fix build

Update binggan requirement from 0.10.0 to 0.12.0
--- updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>
2026-01-06 17:22:54 +00:00 · 2024-10-16 10:33:24 +08:00 · 2024-10-15 20:05:11 +00:00
342 changed files with 7013 additions and 21133 deletions
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -15,11 +15,11 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install Rust
-        run: rustup toolchain install nightly-2025-12-01 --profile minimal --component llvm-tools-preview
+        run: rustup toolchain install nightly-2024-07-01 --profile minimal --component llvm-tools-preview
      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
-        run: cargo +nightly-2025-12-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
+        run: cargo +nightly-2024-07-01 llvm-cov --all-features --workspace --doctests --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
        continue-on-error: true
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -39,11 +39,11 @@ jobs:
    - name: Check Formatting
      run: cargo +nightly fmt --all -- --check
-
+    
    - name: Check Stable Compilation
      run: cargo build --all-features
-
+    
    - name: Check Bench Compilation
      run: cargo +nightly bench --no-run --profile=dev --all-features
@@ -59,10 +59,10 @@ jobs:
    strategy:
      matrix:
-        features:
+        features: [
-          - { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints,stemmer" }
+            { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints" },
-          - { label: "quickwit", flags: "mmap,quickwit,failpoints" }
+            { label: "quickwit", flags: "mmap,quickwit,failpoints" }
-          - { label: "none", flags: "" }
+        ]
    name: test-${{ matrix.features.label}}
@@ -80,21 +80,7 @@ jobs:
    - uses: Swatinem/rust-cache@v2
    - name: Run tests
-      run: |
+      run: cargo +stable nextest run --features ${{ matrix.features.flags }} --verbose --workspace
        # if matrix.feature.flags is empty then run on --lib to avoid compiling examples
        # (as most of them rely on mmap) otherwise run all
        if [ -z "${{ matrix.features.flags }}" ]; then
          cargo +stable nextest run --lib --no-default-features --verbose --workspace
        else
          cargo +stable nextest run --features ${{ matrix.features.flags }} --no-default-features --verbose --workspace
        fi
    - name: Run doctests
-      run: |
+      run: cargo +stable test --doc --features ${{ matrix.features.flags }} --verbose --workspace
        # if matrix.feature.flags is empty then run on --lib to avoid compiling examples
        # (as most of them rely on mmap) otherwise run all
        if [ -z "${{ matrix.features.flags }}" ]; then
          echo "no doctest for no feature flag"
        else
          cargo +stable test --doc --features ${{ matrix.features.flags }} --verbose --workspace
        fi
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -46,7 +46,7 @@ The file of a segment has the format
 ```segment-id . ext```
-The extension signals which data structure (or [`SegmentComponent`](src/index/segment_component.rs)) is stored in the file.
+The extension signals which data structure (or [`SegmentComponent`](src/core/segment_component.rs)) is stored in the file.
 A small `meta.json` file is in charge of keeping track of the list of segments, as well as the schema.
@@ -102,7 +102,7 @@ but users can extend tantivy with their own implementation.
 Tantivy's document follows a very strict schema, decided before building any index.
-The schema defines all of the fields that the indexes [`Document`](src/schema/document/mod.rs) may and should contain, their types (`text`, `i64`, `u64`, `Date`, ...) as well as how it should be indexed / represented in tantivy.
+The schema defines all of the fields that the indexes [`Document`](src/schema/document.rs) may and should contain, their types (`text`, `i64`, `u64`, `Date`, ...) as well as how it should be indexed / represented in tantivy.
 Depending on the type of the field, you can decide to
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,42 +1,11 @@
-Tantivy 0.25
+Tantivy 0.23 - Unreleased
 ================================
-
+Tantivy 0.23 will be backwards compatible with indices created with v0.22 and v0.21.
 ## Bugfixes
 - fix union performance regression in tantivy 0.24 [#2663](https://github.com/quickwit-oss/tantivy/pull/2663)(@PSeitz)
 - make zstd optional in sstable [#2633](https://github.com/quickwit-oss/tantivy/pull/2633)(@Parth)
 - Fix TopDocs::order_by_string_fast_field for asc order [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz)
 ## Features/Improvements
 - add docs/example and Vec<u32> values to sstable [#2660](https://github.com/quickwit-oss/tantivy/pull/2660)(@PSeitz)
 - Add string fast field support to `TopDocs`. [#2642](https://github.com/quickwit-oss/tantivy/pull/2642)(@stuhood)
 - update edition to 2024 [#2620](https://github.com/quickwit-oss/tantivy/pull/2620)(@PSeitz)
 - Allow optional spaces between the field name and the value in the query parser [#2678](https://github.com/quickwit-oss/tantivy/pull/2678)(@Darkheir)
 - Support mixed field types in query parser [#2676](https://github.com/quickwit-oss/tantivy/pull/2676)(@trinity-1686a)
 - Add per-field size details [#2679](https://github.com/quickwit-oss/tantivy/pull/2679)(@fulmicoton)
 Tantivy 0.24.2
 ================================
 - Fix TopNComputer for reverse order. [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz) 
 Affected queries are [order_by_fast_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_fast_field) and
 [order_by_u64_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_u64_field)
 for `Order::Asc`
 Tantivy 0.24.1
 ================================
 - Fix: bump required rust version to 1.81
 Tantivy 0.24
 ================================
 Tantivy 0.24 will be backwards compatible with indices created with v0.22 and v0.21. The new minimum rust version will be 1.75. Tantivy 0.23 will be skipped.
 #### Bugfixes
 - fix potential endless loop in merge [#2457](https://github.com/quickwit-oss/tantivy/pull/2457)(@PSeitz)
 - fix bug that causes out-of-order sstable key. [#2445](https://github.com/quickwit-oss/tantivy/pull/2445)(@fulmicoton)
 - fix ReferenceValue API flaw [#2372](https://github.com/quickwit-oss/tantivy/pull/2372)(@PSeitz)
 - fix `OwnedBytes` debug panic [#2512](https://github.com/quickwit-oss/tantivy/pull/2512)(@b41sh)
 - catch panics during merges [#2582](https://github.com/quickwit-oss/tantivy/pull/2582)(@rdettai)
 - switch from u32 to usize in bitpacker. This enables multivalued columns larger than 4GB, which crashed during merge before. [#2581](https://github.com/quickwit-oss/tantivy/pull/2581) [#2586](https://github.com/quickwit-oss/tantivy/pull/2586)(@fulmicoton-dd @PSeitz)
 #### Breaking API Changes
 - remove index sorting [#2434](https://github.com/quickwit-oss/tantivy/pull/2434)(@PSeitz)
@@ -54,7 +23,6 @@ Tantivy 0.24 will be backwards compatible with indices created with v0.22 and v0
    - reduce top hits memory consumption [#2426](https://github.com/quickwit-oss/tantivy/pull/2426)(@PSeitz)
    - check unsupported parameters top_hits [#2351](https://github.com/quickwit-oss/tantivy/pull/2351)(@PSeitz)
    - Change AggregationLimits to AggregationLimitsGuard [#2495](https://github.com/quickwit-oss/tantivy/pull/2495)(@PSeitz)
    - add support for counting non integer in aggregation [#2547](https://github.com/quickwit-oss/tantivy/pull/2547)(@trinity-1686a)
 - **Range Queries**
    - Support fast field range queries on json fields [#2456](https://github.com/quickwit-oss/tantivy/pull/2456)(@PSeitz)
    - Add support for str fast field range query [#2460](https://github.com/quickwit-oss/tantivy/pull/2460) [#2452](https://github.com/quickwit-oss/tantivy/pull/2452) [#2453](https://github.com/quickwit-oss/tantivy/pull/2453)(@PSeitz)
@@ -65,20 +33,11 @@ Tantivy 0.24 will be backwards compatible with indices created with v0.22 and v0
 - add columnar format compatibility tests [#2433](https://github.com/quickwit-oss/tantivy/pull/2433)(@PSeitz)
 - Improved snippet ranges algorithm [#2474](https://github.com/quickwit-oss/tantivy/pull/2474)(@gezihuzi)
 - make find_field_with_default return json fields without path [#2476](https://github.com/quickwit-oss/tantivy/pull/2476)(@trinity-1686a)
- Make `BooleanQuery` support `minimum_number_should_match` [#2405](https://github.com/quickwit-oss/tantivy/pull/2405)(@LebranceBW)
+- feat(query): Make `BooleanQuery` support `minimum_number_should_match` [#2405](https://github.com/quickwit-oss/tantivy/pull/2405)(@LebranceBW)
 - Make `NUM_MERGE_THREADS` configurable [#2535](https://github.com/quickwit-oss/tantivy/pull/2535)(@Barre)
- **RegexPhraseQuery** 
+- **Optional Index in Multivalue Columnar Index** For mostly empty multivalued indices there was a large overhead during creation when iterating all docids (merge case). This is alleviated by placing an optional index in the multivalued index to mark documents that have values. This will slightly increase space and access time. [#2439](https://github.com/quickwit-oss/tantivy/pull/2439)(@PSeitz)
 `RegexPhraseQuery` supports phrase queries with regex. E.g. query "b.* b.* wolf" matches "big bad wolf". Slop is supported as well: "b.* wolf"~2 matches "big bad wolf" [#2516](https://github.com/quickwit-oss/tantivy/pull/2516)(@PSeitz)
- **Optional Index in Multivalue Columnar Index** 
+- **Performace/Memory**
 For mostly empty multivalued indices there was a large overhead during creation when iterating all docids (merge case). 
 This is alleviated by placing an optional index in the multivalued index to mark documents that have values. 
 This will slightly increase space and access time. [#2439](https://github.com/quickwit-oss/tantivy/pull/2439)(@PSeitz)
 - **Store DateTime as nanoseconds in doc store** DateTime in the doc store was truncated to microseconds previously. This removes this truncation, while still keeping backwards compatibility. [#2486](https://github.com/quickwit-oss/tantivy/pull/2486)(@PSeitz)
 - **Performance/Memory**
    - lift clauses in LogicalAst for optimized ast during execution [#2449](https://github.com/quickwit-oss/tantivy/pull/2449)(@PSeitz)
    - Use Vec instead of BTreeMap to back OwnedValue object [#2364](https://github.com/quickwit-oss/tantivy/pull/2364)(@fulmicoton)
    - Replace TantivyDocument with CompactDoc. CompactDoc is much smaller and provides similar performance. [#2402](https://github.com/quickwit-oss/tantivy/pull/2402)(@PSeitz)
@@ -92,29 +51,18 @@ This will slightly increase space and access time. [#2439](https://github.com/qu
    - fix de-escaping too much in query parser [#2427](https://github.com/quickwit-oss/tantivy/pull/2427)(@trinity-1686a)
    - improve query parser [#2416](https://github.com/quickwit-oss/tantivy/pull/2416)(@trinity-1686a)
    - Support field grouping `title:(return AND "pink panther")` [#2333](https://github.com/quickwit-oss/tantivy/pull/2333)(@trinity-1686a)
    - allow term starting with wildcard [#2568](https://github.com/quickwit-oss/tantivy/pull/2568)(@trinity-1686a)
 - Exist queries match subpath fields [#2558](https://github.com/quickwit-oss/tantivy/pull/2558)(@rdettai)
 - add access benchmark for columnar [#2432](https://github.com/quickwit-oss/tantivy/pull/2432)(@PSeitz)
 - extend indexwriter proptests [#2342](https://github.com/quickwit-oss/tantivy/pull/2342)(@PSeitz)
 - add bench & test for columnar merging [#2428](https://github.com/quickwit-oss/tantivy/pull/2428)(@PSeitz)
 - Change in Executor API [#2391](https://github.com/quickwit-oss/tantivy/pull/2391)(@fulmicoton)
 - Removed usage of num_cpus [#2387](https://github.com/quickwit-oss/tantivy/pull/2387)(@fulmicoton)
- use bingang for agg and stacker benchmark [#2378](https://github.com/quickwit-oss/tantivy/pull/2378)[#2492](https://github.com/quickwit-oss/tantivy/pull/2492)(@PSeitz) 
+- use bingang for agg benchmark [#2378](https://github.com/quickwit-oss/tantivy/pull/2378)(@PSeitz)
 - cleanup top level exports [#2382](https://github.com/quickwit-oss/tantivy/pull/2382)(@PSeitz)
 - make convert_to_fast_value_and_append_to_json_term pub [#2370](https://github.com/quickwit-oss/tantivy/pull/2370)(@PSeitz)
 - remove JsonTermWriter [#2238](https://github.com/quickwit-oss/tantivy/pull/2238)(@PSeitz)
 - validate sort by field type [#2336](https://github.com/quickwit-oss/tantivy/pull/2336)(@PSeitz)
 - Fix trait bound of StoreReader::iter [#2360](https://github.com/quickwit-oss/tantivy/pull/2360)(@adamreichold)
 - remove read_postings_no_deletes [#2526](https://github.com/quickwit-oss/tantivy/pull/2526)(@PSeitz)
 Tantivy 0.22.1
 ================================
 - Fix TopNComputer for reverse order. [#2672](https://github.com/quickwit-oss/tantivy/pull/2672)(@stuhood @PSeitz) 
 Affected queries are [order_by_fast_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_fast_field) and
 [order_by_u64_field](https://docs.rs/tantivy/latest/tantivy/collector/struct.TopDocs.html#method.order_by_u64_field)
 for `Order::Asc`
 Tantivy 0.22
 ================================
@@ -769,7 +717,7 @@ Tantivy 0.4.0
 - Raise the limit of number of fields (previously 256 fields) (@fulmicoton)
 - Removed u32 fields. They are replaced by u64 and i64 fields (#65) (@fulmicoton)
 - Optimized skip in SegmentPostings (#130) (@lnicola)
- Replacing rustc_serialize by serde. Kudos to  benchmark@KodrAus and @lnicola
+- Replacing rustc_serialize by serde. Kudos to @KodrAus and @lnicola
 - Using error-chain (@KodrAus)
 - QueryParser: (@fulmicoton)
  - Explicit error returned when searched for a term that is not indexed
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,10 +0,0 @@
 cff-version: 1.2.0
 message: "If you use this software, please cite it as below."
 authors:
  - alias: Quickwit Inc.
    website: "https://quickwit.io"
 title: "tantivy"
 version: 0.22.0
 doi: 10.5281/zenodo.13942948
 date-released: 2024-10-17
 url: "https://github.com/quickwit-oss/tantivy"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.26.0"
+version = "0.23.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -11,7 +11,7 @@ repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2021"
-rust-version = "1.85"
+rust-version = "1.66"
 exclude = ["benches/*.json", "benches/*.txt"]
 [dependencies]
@@ -31,20 +31,20 @@ lz4_flex = { version = "0.11", default-features = false, optional = true }
 zstd = { version = "0.13", optional = true, default-features = false }
 tempfile = { version = "3.12.0", optional = true }
 log = "0.4.16"
-serde = { version = "1.0.219", features = ["derive"] }
+serde = { version = "1.0.136", features = ["derive"] }
-serde_json = "1.0.140"
+serde_json = "1.0.79"
-fs4 = { version = "0.13.1", optional = true }
+fs4 = { version = "0.8.0", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
-rust-stemmers = { version = "1.2.0", optional = true }
+rust-stemmers = "1.2.0"
-downcast-rs = "2.0.1"
+downcast-rs = "1.2.1"
 bitpacking = { version = "0.9.2", default-features = false, features = [
    "bitpacker4x",
 ] }
 census = "0.4.2"
-rustc-hash = "2.0.0"
+rustc-hash = "1.1.0"
-thiserror = "2.0.1"
+thiserror = "1.0.30"
 htmlescape = "0.3.1"
 fail = { version = "0.5.0", optional = true }
 time = { version = "0.3.35", features = ["serde-well-known"] }
@@ -52,35 +52,32 @@ smallvec = "1.8.0"
 rayon = "1.5.2"
 lru = "0.12.0"
 fastdivide = "0.4.0"
-itertools = "0.14.0"
+itertools = "0.13.0"
-measure_time = "0.9.0"
+measure_time = "0.8.2"
 arc-swap = "1.5.0"
 bon = "3.3.1"
-columnar = { version = "0.6", path = "./columnar", package = "tantivy-columnar" }
+columnar = { version = "0.3", path = "./columnar", package = "tantivy-columnar" }
-sstable = { version = "0.6", path = "./sstable", package = "tantivy-sstable", optional = true }
+sstable = { version = "0.3", path = "./sstable", package = "tantivy-sstable", optional = true }
-stacker = { version = "0.6", path = "./stacker", package = "tantivy-stacker" }
+stacker = { version = "0.3", path = "./stacker", package = "tantivy-stacker" }
-query-grammar = { version = "0.25.0", path = "./query-grammar", package = "tantivy-query-grammar" }
+query-grammar = { version = "0.22.0", path = "./query-grammar", package = "tantivy-query-grammar" }
-tantivy-bitpacker = { version = "0.9", path = "./bitpacker" }
+tantivy-bitpacker = { version = "0.6", path = "./bitpacker" }
-common = { version = "0.10", path = "./common/", package = "tantivy-common" }
+common = { version = "0.7", path = "./common/", package = "tantivy-common" }
-tokenizer-api = { version = "0.6", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
+tokenizer-api = { version = "0.3", path = "./tokenizer-api", package = "tantivy-tokenizer-api" }
 sketches-ddsketch = { version = "0.3.0", features = ["use_serde"] }
 hyperloglogplus = { version = "0.4.1", features = ["const-loop"] }
 futures-util = { version = "0.3.28", optional = true }
 futures-channel = { version = "0.3.28", optional = true }
 fnv = "1.0.7"
 typetag = "0.2.21"
 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
 [dev-dependencies]
-binggan = "0.14.2"
+binggan = "0.12.0"
 rand = "0.8.5"
 maplit = "1.0.2"
 matches = "0.1.9"
 pretty_assertions = "1.2.1"
-proptest = "1.7.0"
+proptest = "1.0.0"
 test-log = "0.2.10"
 futures = "0.3.21"
 paste = "1.0.11"
@@ -88,7 +85,7 @@ more-asserts = "0.3.1"
 rand_distr = "0.4.3"
 time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
 postcard = { version = "1.0.4", features = [
-    "use-std",
+  "use-std",
 ], default-features = false }
 [target.'cfg(not(windows))'.dev-dependencies]
@@ -113,21 +110,17 @@ debug-assertions = true
 overflow-checks = true
 [features]
-default = ["mmap", "stopwords", "lz4-compression", "columnar-zstd-compression", "stemmer"]
+default = ["mmap", "stopwords", "lz4-compression"]
 stemmer = ["rust-stemmers"]
 mmap = ["fs4", "tempfile", "memmap2"]
 stopwords = []
 lz4-compression = ["lz4_flex"]
 zstd-compression = ["zstd"]
 # enable zstd-compression in columnar (and sstable)
 columnar-zstd-compression = ["columnar/zstd-compression"]
 failpoints = ["fail", "fail/failpoints"]
 unstable = []                            # useful for benches.
-quickwit = ["sstable", "futures-util", "futures-channel"]
+quickwit = ["sstable", "futures-util"]
 # Compares only the hash of a string when indexing data.
 # Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision.
@@ -169,23 +162,3 @@ harness = false
 [[bench]]
 name = "agg_bench"
 harness = false
 [[bench]]
 name = "exists_json"
 harness = false
 [[bench]]
 name = "range_query"
 harness = false
 [[bench]]
 name = "and_or_queries"
 harness = false
 [[bench]]
 name = "range_queries"
 harness = false
 [[bench]]
 name = "bool_queries_with_range"
 harness = false
--- a/README.md
+++ b/README.md
@@ -23,6 +23,8 @@ performance for different types of queries/collections.
 Your mileage WILL vary depending on the nature of queries and their load.
 <img src="doc/assets/images/searchbenchmark.png">
 Details about the benchmark can be found at this [repository](https://github.com/quickwit-oss/search-benchmark-game).
 ## Features
@@ -123,7 +125,6 @@ You can also find other bindings on [GitHub](https://github.com/search?q=tantivy
 - [seshat](https://github.com/matrix-org/seshat/): A matrix message database/indexer
 - [tantiny](https://github.com/baygeldin/tantiny): Tiny full-text search for Ruby
 - [lnx](https://github.com/lnx-search/lnx): adaptable, typo tolerant search engine with a REST API
 - [Bichon](https://github.com/rustmailer/bichon): A lightweight, high-performance Rust email archiver with WebUI
 - and [more](https://github.com/search?q=tantivy)!
 ### On average, how much faster is Tantivy compared to Lucene?
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,4 +1,4 @@
-# Releasing a new Tantivy Version
+# Release a new Tantivy Version
 ## Steps
@@ -10,29 +10,12 @@
 6. Set git tag with new version
-[`cargo-release`](https://github.com/crate-ci/cargo-release) will help us with steps 1-5:
+In conjucation with `cargo-release` Steps 1-4 (I'm not sure if the change detection works):
 Set new packages to version 0.0.0
 Replace prev-tag-name
 ```bash
-cargo release --workspace --no-publish -v --prev-tag-name 0.24 --push-remote origin minor --no-tag
+cargo release --workspace --no-publish -v --prev-tag-name 0.19 --push-remote origin minor --no-tag --execute
 ```
-`no-tag` or it will create tags for all the subpackages
+no-tag or it will create tags for all the subpackages
 cargo release will _not_ ignore unchanged packages, but it will print warnings for them.
 e.g. "warning: updating ownedbytes to 0.10.0 despite no changes made since tag 0.24"
 We need to manually ignore these unchanged packages
 ```bash
 cargo release --workspace --no-publish -v --prev-tag-name 0.24 --push-remote origin minor --no-tag --exclude tokenizer-api
 ```
 Add `--execute` to actually publish the packages, otherwise it will only print the commands that would be run.
 ### Tag Version
 ```bash
 git tag 0.25.0
 git push upstream tag 0.25.0
 ```
--- a/TODO.txt
+++ b/TODO.txt
@@ -10,7 +10,7 @@ rename FastFieldReaders::open to load
 remove fast field reader
 find a way to unify the two DateTime.
-re-add type check in the filter wrapper
+readd type check in the filter wrapper
 add unit test on columnar list columns.
--- a/benches/agg_bench.rs
+++ b/benches/agg_bench.rs
@@ -1,6 +1,5 @@
 use binggan::plugins::PeakMemAllocPlugin;
 use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
 use rand::distributions::WeightedIndex;
 use rand::prelude::SliceRandom;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
@@ -21,6 +20,7 @@ macro_rules! register {
    ($runner:expr, $func:ident) => {
        $runner.register(stringify!($func), move |index| {
            $func(index);
            None
        })
    };
 }
@@ -54,41 +54,26 @@ fn bench_agg(mut group: InputGroup<Index>) {
    register!(group, stats_f64);
    register!(group, extendedstats_f64);
    register!(group, percentiles_f64);
-    register!(group, terms_7);
+    register!(group, terms_few);
-    register!(group, terms_all_unique);
+    register!(group, terms_many);
    register!(group, terms_150_000);
    register!(group, terms_many_top_1000);
    register!(group, terms_many_order_by_term);
    register!(group, terms_many_with_top_hits);
    register!(group, terms_all_unique_with_avg_sub_agg);
    register!(group, terms_many_with_avg_sub_agg);
    register!(group, terms_status_with_avg_sub_agg);
    register!(group, terms_status_with_histogram);
    register!(group, terms_zipf_1000);
    register!(group, terms_zipf_1000_with_histogram);
    register!(group, terms_zipf_1000_with_avg_sub_agg);
    register!(group, terms_many_json_mixed_type_with_avg_sub_agg);
    register!(group, cardinality_agg);
-    register!(group, terms_status_with_cardinality_agg);
+    register!(group, terms_few_with_cardinality_agg);
    register!(group, range_agg);
    register!(group, range_agg_with_avg_sub_agg);
-    register!(group, range_agg_with_term_agg_status);
+    register!(group, range_agg_with_term_agg_few);
    register!(group, range_agg_with_term_agg_many);
    register!(group, histogram);
    register!(group, histogram_hard_bounds);
    register!(group, histogram_with_avg_sub_agg);
    register!(group, histogram_with_term_agg_status);
    register!(group, avg_and_range_with_avg_sub_agg);
    // Filter aggregation benchmarks
    register!(group, filter_agg_all_query_count_agg);
    register!(group, filter_agg_term_query_count_agg);
    register!(group, filter_agg_all_query_with_sub_aggs);
    register!(group, filter_agg_term_query_with_sub_aggs);
    group.run();
 }
@@ -139,12 +124,12 @@ fn extendedstats_f64(index: &Index) {
 }
 fn percentiles_f64(index: &Index) {
    let agg_req = json!({
-        "mypercentiles": {
+      "mypercentiles": {
-            "percentiles": {
+        "percentiles": {
-                "field": "score_f64",
+          "field": "score_f64",
-                "percents": [ 95, 99, 99.9 ]
+          "percents": [ 95, 99, 99.9 ]
            }
        }
      }
    });
    execute_agg(index, agg_req);
 }
@@ -159,10 +144,10 @@ fn cardinality_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
-fn terms_status_with_cardinality_agg(index: &Index) {
+fn terms_few_with_cardinality_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
-            "terms": { "field": "text_few_terms_status" },
+            "terms": { "field": "text_few_terms" },
            "aggs": {
                "cardinality": {
                    "cardinality": {
@@ -175,20 +160,13 @@ fn terms_status_with_cardinality_agg(index: &Index) {
    execute_agg(index, agg_req);
 }
-fn terms_7(index: &Index) {
+fn terms_few(index: &Index) {
    let agg_req = json!({
-        "my_texts": { "terms": { "field": "text_few_terms_status" } },
+        "my_texts": { "terms": { "field": "text_few_terms" } },
    });
    execute_agg(index, agg_req);
 }
-fn terms_all_unique(index: &Index) {
+fn terms_many(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_all_unique_terms" } },
    });
    execute_agg(index, agg_req);
 }
 fn terms_150_000(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_many_terms" } },
    });
@@ -236,72 +214,6 @@ fn terms_many_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
 fn terms_all_unique_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_all_unique_terms" },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn terms_status_with_histogram(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_few_terms_status" },
            "aggs": {
                "histo": {"histogram": { "field": "score_f64", "interval": 10 }}
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn terms_zipf_1000_with_histogram(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_1000_terms_zipf" },
            "aggs": {
                "histo": {"histogram": { "field": "score_f64", "interval": 10 }}
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn terms_status_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_few_terms_status" },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn terms_zipf_1000_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
            "terms": { "field": "text_1000_terms_zipf" },
            "aggs": {
                "average_f64": { "avg": { "field": "score_f64" } }
            }
        },
    });
    execute_agg(index, agg_req);
 }
 fn terms_zipf_1000(index: &Index) {
    let agg_req = json!({
        "my_texts": { "terms": { "field": "text_1000_terms_zipf" } },
    });
    execute_agg(index, agg_req);
 }
 fn terms_many_json_mixed_type_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "my_texts": {
@@ -357,7 +269,7 @@ fn range_agg_with_avg_sub_agg(index: &Index) {
    execute_agg(index, agg_req);
 }
-fn range_agg_with_term_agg_status(index: &Index) {
+fn range_agg_with_term_agg_few(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "range": {
@@ -372,7 +284,7 @@ fn range_agg_with_term_agg_status(index: &Index) {
                ]
            },
            "aggs": {
-                "my_texts": { "terms": { "field": "text_few_terms_status" } },
+                "my_texts": { "terms": { "field": "text_few_terms" } },
            }
        },
    });
@@ -428,17 +340,6 @@ fn histogram_with_avg_sub_agg(index: &Index) {
    });
    execute_agg(index, agg_req);
 }
 fn histogram_with_term_agg_status(index: &Index) {
    let agg_req = json!({
        "rangef64": {
            "histogram": { "field": "score_f64", "interval": 10 },
            "aggs": {
                "my_texts": { "terms": { "field": "text_few_terms_status" } }
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn avg_and_range_with_avg_sub_agg(index: &Index) {
    let agg_req = json!({
        "rangef64": {
@@ -478,13 +379,6 @@ fn get_collector(agg_req: Aggregations) -> AggregationCollector {
 }
 fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
    // Flag to use existing index
    let reuse_index = std::env::var("REUSE_AGG_BENCH_INDEX").is_ok();
    if reuse_index && std::path::Path::new("agg_bench").exists() {
        return Index::open_in_dir("agg_bench");
    }
    // crreate dir
    std::fs::create_dir_all("agg_bench")?;
    let mut schema_builder = Schema::builder();
    let text_fieldtype = tantivy::schema::TextOptions::default()
        .set_indexing_options(
@@ -493,47 +387,20 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
        .set_stored();
    let text_field = schema_builder.add_text_field("text", text_fieldtype);
    let json_field = schema_builder.add_json_field("json", FAST);
    let text_field_all_unique_terms =
        schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
    let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
-    let text_field_few_terms_status =
+    let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
        schema_builder.add_text_field("text_few_terms_status", STRING | FAST);
    let text_field_1000_terms_zipf =
        schema_builder.add_text_field("text_1000_terms_zipf", STRING | FAST);
    let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
    let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
    let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
    let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
-    // use tmp dir
+    let index = Index::create_from_tempdir(schema_builder.build())?;
-    let index = if reuse_index {
+    let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
        Index::create_in_dir("agg_bench", schema_builder.build())?
    } else {
        Index::create_from_tempdir(schema_builder.build())?
    };
    // Approximate log proportions
    let status_field_data = [
        ("INFO", 8000),
        ("ERROR", 300),
        ("WARN", 1200),
        ("DEBUG", 500),
        ("OK", 500),
        ("CRITICAL", 20),
        ("EMERGENCY", 1),
    ];
    let log_level_distribution =
        WeightedIndex::new(status_field_data.iter().map(|item| item.1)).unwrap();
    let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
    let many_terms_data = (0..150_000)
        .map(|num| format!("author{num}"))
        .collect::<Vec<_>>();
    // Prepare 1000 unique terms sampled using a Zipf distribution.
    // Exponent ~1.1 approximates top-20 terms covering around ~20%.
    let terms_1000: Vec<String> = (1..=1000).map(|i| format!("term_{i}")).collect();
    let zipf_1000 = rand_distr::Zipf::new(1000, 1.1f64).unwrap();
    {
        let mut rng = StdRng::from_seed([1u8; 32]);
        let mut index_writer = index.writer_with_num_threads(1, 200_000_000)?;
@@ -543,25 +410,15 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!())?;
        }
        if cardinality == Cardinality::Multivalued {
            let log_level_sample_a = status_field_data[log_level_distribution.sample(&mut rng)].0;
            let log_level_sample_b = status_field_data[log_level_distribution.sample(&mut rng)].0;
            let idx_a = zipf_1000.sample(&mut rng) as usize - 1;
            let idx_b = zipf_1000.sample(&mut rng) as usize - 1;
            let term_1000_a = &terms_1000[idx_a];
            let term_1000_b = &terms_1000[idx_b];
            index_writer.add_document(doc!(
                json_field => json!({"mixed_type": 10.0}),
                json_field => json!({"mixed_type": 10.0}),
                text_field => "cool",
                text_field => "cool",
                text_field_all_unique_terms => "cool",
                text_field_all_unique_terms => "coolo",
                text_field_many_terms => "cool",
                text_field_many_terms => "cool",
-                text_field_few_terms_status => log_level_sample_a,
+                text_field_few_terms => "cool",
-                text_field_few_terms_status => log_level_sample_b,
+                text_field_few_terms => "cool",
                text_field_1000_terms_zipf => term_1000_a.as_str(),
                text_field_1000_terms_zipf => term_1000_b.as_str(),
                score_field => 1u64,
                score_field => 1u64,
                score_field_f64 => lg_norm.sample(&mut rng),
@@ -586,10 +443,8 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
            index_writer.add_document(doc!(
                text_field => "cool",
                json_field => json,
                text_field_all_unique_terms => format!("unique_term_{}", rng.gen::<u64>()),
                text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
-                text_field_few_terms_status => status_field_data[log_level_distribution.sample(&mut rng)].0,
+                text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
                text_field_1000_terms_zipf => terms_1000[zipf_1000.sample(&mut rng) as usize - 1].as_str(),
                score_field => val as u64,
                score_field_f64 => lg_norm.sample(&mut rng),
                score_field_i64 => val as i64,
@@ -606,61 +461,3 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
    Ok(index)
 }
 // Filter aggregation benchmarks
 fn filter_agg_all_query_count_agg(index: &Index) {
    let agg_req = json!({
        "filtered": {
            "filter": "*",
            "aggs": {
                "count": { "value_count": { "field": "score" } }
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn filter_agg_term_query_count_agg(index: &Index) {
    let agg_req = json!({
        "filtered": {
            "filter": "text:cool",
            "aggs": {
                "count": { "value_count": { "field": "score" } }
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn filter_agg_all_query_with_sub_aggs(index: &Index) {
    let agg_req = json!({
        "filtered": {
            "filter": "*",
            "aggs": {
                "avg_score": { "avg": { "field": "score" } },
                "stats_score": { "stats": { "field": "score_f64" } },
                "terms_text": {
                    "terms": { "field": "text_few_terms_status" }
                }
            }
        }
    });
    execute_agg(index, agg_req);
 }
 fn filter_agg_term_query_with_sub_aggs(index: &Index) {
    let agg_req = json!({
        "filtered": {
            "filter": "text:cool",
            "aggs": {
                "avg_score": { "avg": { "field": "score" } },
                "stats_score": { "stats": { "field": "score_f64" } },
                "terms_text": {
                    "terms": { "field": "text_few_terms_status" }
                }
            }
        }
    });
    execute_agg(index, agg_req);
 }
--- a/benches/and_or_queries.rs
+++ b/benches/and_or_queries.rs
@@ -1,218 +0,0 @@
 // Benchmarks boolean conjunction queries using binggan.
 //
 // What’s measured:
 // - Or and And queries with varying selectivity (only `Term` queries for now on leafs)
 // - Nested AND/OR combinations (on multiple fields)
 // - No-scoring path using the Count collector (focus on iterator/skip performance)
 // - Top-K retrieval (k=10) using the TopDocs collector
 //
 // Corpus model:
 // - Synthetic docs; each token a/b/c is independently included per doc
 // - If none of a/b/c are included, emit a neutral filler token to keep doc length similar
 //
 // Notes:
 // - After optimization, when scoring is disabled Tantivy reads doc-only postings
 //   (IndexRecordOption::Basic), avoiding frequency decoding overhead.
 // - This bench isolates boolean iteration speed and intersection/union cost.
 // - Use `cargo bench --bench boolean_conjunction` to run.
 use binggan::{black_box, BenchGroup, BenchRunner};
 use rand::prelude::*;
 use rand::rngs::StdRng;
 use rand::SeedableRng;
 use tantivy::collector::sort_key::SortByStaticFastValue;
 use tantivy::collector::{Collector, Count, TopDocs};
 use tantivy::query::{Query, QueryParser};
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{doc, Index, Order, ReloadPolicy, Searcher};
 #[derive(Clone)]
 struct BenchIndex {
    #[allow(dead_code)]
    index: Index,
    searcher: Searcher,
    query_parser: QueryParser,
 }
 /// Build a single index containing both fields (title, body) and
 /// return two BenchIndex views:
 /// - single_field: QueryParser defaults to only "body"
 /// - multi_field:  QueryParser defaults to ["title", "body"]
 fn build_shared_indices(num_docs: usize, p_a: f32, p_b: f32, p_c: f32) -> (BenchIndex, BenchIndex) {
    // Unified schema (two text fields)
    let mut schema_builder = Schema::builder();
    let f_title = schema_builder.add_text_field("title", TEXT);
    let f_body = schema_builder.add_text_field("body", TEXT);
    let f_score = schema_builder.add_u64_field("score", FAST);
    let f_score2 = schema_builder.add_u64_field("score2", FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema.clone());
    // Populate index with stable RNG for reproducibility.
    let mut rng = StdRng::from_seed([7u8; 32]);
    // Populate: spread each present token 90/10 to body/title
    {
        let mut writer = index.writer_with_num_threads(1, 500_000_000).unwrap();
        for _ in 0..num_docs {
            let has_a = rng.gen_bool(p_a as f64);
            let has_b = rng.gen_bool(p_b as f64);
            let has_c = rng.gen_bool(p_c as f64);
            let score = rng.gen_range(0u64..100u64);
            let score2 = rng.gen_range(0u64..100_000u64);
            let mut title_tokens: Vec<&str> = Vec::new();
            let mut body_tokens: Vec<&str> = Vec::new();
            if has_a {
                if rng.gen_bool(0.1) {
                    title_tokens.push("a");
                } else {
                    body_tokens.push("a");
                }
            }
            if has_b {
                if rng.gen_bool(0.1) {
                    title_tokens.push("b");
                } else {
                    body_tokens.push("b");
                }
            }
            if has_c {
                if rng.gen_bool(0.1) {
                    title_tokens.push("c");
                } else {
                    body_tokens.push("c");
                }
            }
            if title_tokens.is_empty() && body_tokens.is_empty() {
                body_tokens.push("z");
            }
            writer
                .add_document(doc!(
                    f_title=>title_tokens.join(" "),
                    f_body=>body_tokens.join(" "),
                    f_score=>score,
                    f_score2=>score2,
                ))
                .unwrap();
        }
        writer.commit().unwrap();
    }
    // Prepare reader/searcher once.
    let reader = index
        .reader_builder()
        .reload_policy(ReloadPolicy::Manual)
        .try_into()
        .unwrap();
    let searcher = reader.searcher();
    // Build two query parsers with different default fields.
    let qp_single = QueryParser::for_index(&index, vec![f_body]);
    let qp_multi = QueryParser::for_index(&index, vec![f_title, f_body]);
    let single_view = BenchIndex {
        index: index.clone(),
        searcher: searcher.clone(),
        query_parser: qp_single,
    };
    let multi_view = BenchIndex {
        index,
        searcher,
        query_parser: qp_multi,
    };
    (single_view, multi_view)
 }
 fn main() {
    // Prepare corpora with varying selectivity. Build one index per corpus
    // and derive two views (single-field vs multi-field) from it.
    let scenarios = vec![
        (
            "N=1M, p(a)=5%, p(b)=1%, p(c)=15%".to_string(),
            1_000_000,
            0.05,
            0.01,
            0.15,
        ),
        (
            "N=1M, p(a)=1%, p(b)=1%, p(c)=15%".to_string(),
            1_000_000,
            0.01,
            0.01,
            0.15,
        ),
    ];
    let queries = &["a", "+a +b", "+a +b +c", "a OR b", "a OR b OR c"];
    let mut runner = BenchRunner::new();
    for (label, n, pa, pb, pc) in scenarios {
        let (single_view, multi_view) = build_shared_indices(n, pa, pb, pc);
        for (view_name, bench_index) in [("single_field", single_view), ("multi_field", multi_view)]
        {
            // Single-field group: default field is body only
            let mut group = runner.new_group();
            group.set_name(format!("{} — {}", view_name, label));
            for query_str in queries {
                add_bench_task(&mut group, &bench_index, query_str, Count, "count");
                add_bench_task(
                    &mut group,
                    &bench_index,
                    query_str,
                    TopDocs::with_limit(10).order_by_score(),
                    "top10",
                );
                add_bench_task(
                    &mut group,
                    &bench_index,
                    query_str,
                    TopDocs::with_limit(10).order_by_fast_field::<u64>("score", Order::Asc),
                    "top10_by_ff",
                );
                add_bench_task(
                    &mut group,
                    &bench_index,
                    query_str,
                    TopDocs::with_limit(10).order_by((
                        SortByStaticFastValue::<u64>::for_field("score"),
                        SortByStaticFastValue::<u64>::for_field("score2"),
                    )),
                    "top10_by_2ff",
                );
            }
            group.run();
        }
    }
 }
 fn add_bench_task<C: Collector + 'static>(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query_str: &str,
    collector: C,
    collector_name: &str,
 ) {
    let task_name = format!("{}_{}", query_str.replace(" ", "_"), collector_name);
    let query = bench_index.query_parser.parse_query(query_str).unwrap();
    let search_task = SearchTask {
        searcher: bench_index.searcher.clone(),
        collector,
        query,
    };
    bench_group.register(task_name, move |_| black_box(search_task.run()));
 }
 struct SearchTask<C: Collector> {
    searcher: Searcher,
    collector: C,
    query: Box<dyn Query>,
 }
 impl<C: Collector> SearchTask<C> {
    #[inline(never)]
    pub fn run(&self) -> usize {
        self.searcher.search(&self.query, &self.collector).unwrap();
        1
    }
 }
--- a/benches/bool_queries_with_range.rs
+++ b/benches/bool_queries_with_range.rs
@@ -1,288 +0,0 @@
 use binggan::{black_box, BenchGroup, BenchRunner};
 use rand::prelude::*;
 use rand::rngs::StdRng;
 use rand::SeedableRng;
 use tantivy::collector::{Collector, Count, DocSetCollector, TopDocs};
 use tantivy::query::{Query, QueryParser};
 use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
 use tantivy::{doc, Index, Order, ReloadPolicy, Searcher};
 #[derive(Clone)]
 struct BenchIndex {
    #[allow(dead_code)]
    index: Index,
    searcher: Searcher,
    query_parser: QueryParser,
 }
 fn build_shared_indices(num_docs: usize, p_title_a: f32, distribution: &str) -> BenchIndex {
    // Unified schema
    let mut schema_builder = Schema::builder();
    let f_title = schema_builder.add_text_field("title", TEXT);
    let f_num_rand = schema_builder.add_u64_field("num_rand", INDEXED);
    let f_num_asc = schema_builder.add_u64_field("num_asc", INDEXED);
    let f_num_rand_fast = schema_builder.add_u64_field("num_rand_fast", INDEXED | FAST);
    let f_num_asc_fast = schema_builder.add_u64_field("num_asc_fast", INDEXED | FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema.clone());
    // Populate index with stable RNG for reproducibility.
    let mut rng = StdRng::from_seed([7u8; 32]);
    {
        let mut writer = index.writer_with_num_threads(1, 4_000_000_000).unwrap();
        match distribution {
            "dense" => {
                for doc_id in 0..num_docs {
                    // Always add title to avoid empty documents
                    let title_token = if rng.gen_bool(p_title_a as f64) {
                        "a"
                    } else {
                        "b"
                    };
                    let num_rand = rng.gen_range(0u64..1000u64);
                    let num_asc = (doc_id / 10000) as u64;
                    writer
                        .add_document(doc!(
                            f_title=>title_token,
                            f_num_rand=>num_rand,
                            f_num_asc=>num_asc,
                            f_num_rand_fast=>num_rand,
                            f_num_asc_fast=>num_asc,
                        ))
                        .unwrap();
                }
            }
            "sparse" => {
                for doc_id in 0..num_docs {
                    // Always add title to avoid empty documents
                    let title_token = if rng.gen_bool(p_title_a as f64) {
                        "a"
                    } else {
                        "b"
                    };
                    let num_rand = rng.gen_range(0u64..10000000u64);
                    let num_asc = doc_id as u64;
                    writer
                        .add_document(doc!(
                            f_title=>title_token,
                            f_num_rand=>num_rand,
                            f_num_asc=>num_asc,
                            f_num_rand_fast=>num_rand,
                            f_num_asc_fast=>num_asc,
                        ))
                        .unwrap();
                }
            }
            _ => {
                panic!("Unsupported distribution type");
            }
        }
        writer.commit().unwrap();
    }
    // Prepare reader/searcher once.
    let reader = index
        .reader_builder()
        .reload_policy(ReloadPolicy::Manual)
        .try_into()
        .unwrap();
    let searcher = reader.searcher();
    // Build query parser for title field
    let qp_title = QueryParser::for_index(&index, vec![f_title]);
    BenchIndex {
        index,
        searcher,
        query_parser: qp_title,
    }
 }
 fn main() {
    // Prepare corpora with varying scenarios
    let scenarios = vec![
        (
            "dense and 99% a".to_string(),
            10_000_000,
            0.99,
            "dense",
            0,
            9,
        ),
        (
            "dense and 99% a".to_string(),
            10_000_000,
            0.99,
            "dense",
            990,
            999,
        ),
        (
            "sparse and 99% a".to_string(),
            10_000_000,
            0.99,
            "sparse",
            0,
            9,
        ),
        (
            "sparse and 99% a".to_string(),
            10_000_000,
            0.99,
            "sparse",
            9_999_990,
            9_999_999,
        ),
    ];
    let mut runner = BenchRunner::new();
    for (scenario_id, n, p_title_a, num_rand_distribution, range_low, range_high) in scenarios {
        // Build index for this scenario
        let bench_index = build_shared_indices(n, p_title_a, num_rand_distribution);
        // Create benchmark group
        let mut group = runner.new_group();
        // Now set the name (this moves scenario_id)
        group.set_name(scenario_id);
        // Define all four field types
        let field_names = ["num_rand", "num_asc", "num_rand_fast", "num_asc_fast"];
        // Define the three terms we want to test with
        let terms = ["a", "b", "z"];
        // Generate all combinations of terms and field names
        let mut queries = Vec::new();
        for &term in &terms {
            for &field_name in &field_names {
                let query_str = format!(
                    "{} AND {}:[{} TO {}]",
                    term, field_name, range_low, range_high
                );
                queries.push((query_str, field_name.to_string()));
            }
        }
        let query_str = format!(
            "{}:[{} TO {}] AND {}:[{} TO {}]",
            "num_rand_fast", range_low, range_high, "num_asc_fast", range_low, range_high
        );
        queries.push((query_str, "num_asc_fast".to_string()));
        // Run all benchmark tasks for each query and its corresponding field name
        for (query_str, field_name) in queries {
            run_benchmark_tasks(&mut group, &bench_index, &query_str, &field_name);
        }
        group.run();
    }
 }
 /// Run all benchmark tasks for a given query string and field name
 fn run_benchmark_tasks(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query_str: &str,
    field_name: &str,
 ) {
    // Test count
    add_bench_task(bench_group, bench_index, query_str, Count, "count");
    // Test all results
    add_bench_task(
        bench_group,
        bench_index,
        query_str,
        DocSetCollector,
        "all results",
    );
    // Test top 100 by the field (if it's a FAST field)
    if field_name.ends_with("_fast") {
        // Ascending order
        {
            let collector_name = format!("top100_by_{}_asc", field_name);
            let field_name_owned = field_name.to_string();
            add_bench_task(
                bench_group,
                bench_index,
                query_str,
                TopDocs::with_limit(100).order_by_fast_field::<u64>(field_name_owned, Order::Asc),
                &collector_name,
            );
        }
        // Descending order
        {
            let collector_name = format!("top100_by_{}_desc", field_name);
            let field_name_owned = field_name.to_string();
            add_bench_task(
                bench_group,
                bench_index,
                query_str,
                TopDocs::with_limit(100).order_by_fast_field::<u64>(field_name_owned, Order::Desc),
                &collector_name,
            );
        }
    }
 }
 fn add_bench_task<C: Collector + 'static>(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query_str: &str,
    collector: C,
    collector_name: &str,
 ) {
    let task_name = format!("{}_{}", query_str.replace(" ", "_"), collector_name);
    let query = bench_index.query_parser.parse_query(query_str).unwrap();
    let search_task = SearchTask {
        searcher: bench_index.searcher.clone(),
        collector,
        query,
    };
    bench_group.register(task_name, move |_| black_box(search_task.run()));
 }
 struct SearchTask<C: Collector> {
    searcher: Searcher,
    collector: C,
    query: Box<dyn Query>,
 }
 impl<C: Collector> SearchTask<C> {
    #[inline(never)]
    pub fn run(&self) -> usize {
        let result = self.searcher.search(&self.query, &self.collector).unwrap();
        if let Some(count) = (&result as &dyn std::any::Any).downcast_ref::<usize>() {
            *count
        } else if let Some(top_docs) = (&result as &dyn std::any::Any)
            .downcast_ref::<Vec<(Option<u64>, tantivy::DocAddress)>>()
        {
            top_docs.len()
        } else if let Some(top_docs) =
            (&result as &dyn std::any::Any).downcast_ref::<Vec<(u64, tantivy::DocAddress)>>()
        {
            top_docs.len()
        } else if let Some(doc_set) = (&result as &dyn std::any::Any)
            .downcast_ref::<std::collections::HashSet<tantivy::DocAddress>>()
        {
            doc_set.len()
        } else {
            eprintln!(
                "Unknown collector result type: {:?}",
                std::any::type_name::<C::Fruit>()
            );
            0
        }
    }
 }
--- a/benches/exists_json.rs
+++ b/benches/exists_json.rs
@@ -1,69 +0,0 @@
 use binggan::plugins::PeakMemAllocPlugin;
 use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
 use serde_json::json;
 use tantivy::collector::Count;
 use tantivy::query::ExistsQuery;
 use tantivy::schema::{Schema, FAST, TEXT};
 use tantivy::{doc, Index};
 #[global_allocator]
 pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
 fn main() {
    let doc_count: usize = 500_000;
    let subfield_counts: &[usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 16, 256, 4096, 65536, 262144];
    let indices: Vec<(String, Index)> = subfield_counts
        .iter()
        .map(|&sub_fields| {
            (
                format!("subfields={sub_fields}"),
                build_index_with_json_subfields(doc_count, sub_fields),
            )
        })
        .collect();
    let mut group = InputGroup::new_with_inputs(indices);
    group.add_plugin(PeakMemAllocPlugin::new(GLOBAL));
    group.config().num_iter_group = Some(1);
    group.config().num_iter_bench = Some(1);
    group.register("exists_json", exists_json_union);
    group.run();
 }
 fn exists_json_union(index: &Index) {
    let reader = index.reader().expect("reader");
    let searcher = reader.searcher();
    let query = ExistsQuery::new("json".to_string(), true);
    let count = searcher.search(&query, &Count).expect("exists search");
    // Prevents optimizer from eliding the search
    black_box(count);
 }
 fn build_index_with_json_subfields(num_docs: usize, num_subfields: usize) -> Index {
    // Schema: single JSON field stored as FAST to support ExistsQuery.
    let mut schema_builder = Schema::builder();
    let json_field = schema_builder.add_json_field("json", TEXT | FAST);
    let schema = schema_builder.build();
    let index = Index::create_from_tempdir(schema).expect("create index");
    {
        let mut index_writer = index
            .writer_with_num_threads(1, 200_000_000)
            .expect("writer");
        for i in 0..num_docs {
            let sub = i % num_subfields;
            // Only one subpath set per document; rotate subpaths so that
            // no single subpath is full, but the union covers all docs.
            let v = json!({ format!("field_{sub}"): i as u64 });
            index_writer
                .add_document(doc!(json_field => v))
                .expect("add_document");
        }
        index_writer.commit().expect("commit");
    }
    index
 }
--- a/benches/range_queries.rs
+++ b/benches/range_queries.rs
@@ -1,365 +0,0 @@
 use std::ops::Bound;
 use binggan::{black_box, BenchGroup, BenchRunner};
 use rand::prelude::*;
 use rand::rngs::StdRng;
 use rand::SeedableRng;
 use tantivy::collector::{Count, DocSetCollector, TopDocs};
 use tantivy::query::RangeQuery;
 use tantivy::schema::{Schema, FAST, INDEXED};
 use tantivy::{doc, Index, Order, ReloadPolicy, Searcher, Term};
 #[derive(Clone)]
 struct BenchIndex {
    #[allow(dead_code)]
    index: Index,
    searcher: Searcher,
 }
 fn build_shared_indices(num_docs: usize, distribution: &str) -> BenchIndex {
    // Schema with fast fields only
    let mut schema_builder = Schema::builder();
    let f_num_rand_fast = schema_builder.add_u64_field("num_rand_fast", INDEXED | FAST);
    let f_num_asc_fast = schema_builder.add_u64_field("num_asc_fast", INDEXED | FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema.clone());
    // Populate index with stable RNG for reproducibility.
    let mut rng = StdRng::from_seed([7u8; 32]);
    {
        let mut writer = index.writer_with_num_threads(1, 4_000_000_000).unwrap();
        match distribution {
            "dense" => {
                for doc_id in 0..num_docs {
                    let num_rand = rng.gen_range(0u64..1000u64);
                    let num_asc = (doc_id / 10000) as u64;
                    writer
                        .add_document(doc!(
                            f_num_rand_fast=>num_rand,
                            f_num_asc_fast=>num_asc,
                        ))
                        .unwrap();
                }
            }
            "sparse" => {
                for doc_id in 0..num_docs {
                    let num_rand = rng.gen_range(0u64..10000000u64);
                    let num_asc = doc_id as u64;
                    writer
                        .add_document(doc!(
                            f_num_rand_fast=>num_rand,
                            f_num_asc_fast=>num_asc,
                        ))
                        .unwrap();
                }
            }
            _ => {
                panic!("Unsupported distribution type");
            }
        }
        writer.commit().unwrap();
    }
    // Prepare reader/searcher once.
    let reader = index
        .reader_builder()
        .reload_policy(ReloadPolicy::Manual)
        .try_into()
        .unwrap();
    let searcher = reader.searcher();
    BenchIndex { index, searcher }
 }
 fn main() {
    // Prepare corpora with varying scenarios
    let scenarios = vec![
        // Dense distribution - random values in small range (0-999)
        (
            "dense_values_search_low_value_range".to_string(),
            10_000_000,
            "dense",
            0,
            9,
        ),
        (
            "dense_values_search_high_value_range".to_string(),
            10_000_000,
            "dense",
            990,
            999,
        ),
        (
            "dense_values_search_out_of_range".to_string(),
            10_000_000,
            "dense",
            1000,
            1002,
        ),
        (
            "sparse_values_search_low_value_range".to_string(),
            10_000_000,
            "sparse",
            0,
            9,
        ),
        (
            "sparse_values_search_high_value_range".to_string(),
            10_000_000,
            "sparse",
            9_999_990,
            9_999_999,
        ),
        (
            "sparse_values_search_out_of_range".to_string(),
            10_000_000,
            "sparse",
            10_000_000,
            10_000_002,
        ),
    ];
    let mut runner = BenchRunner::new();
    for (scenario_id, n, num_rand_distribution, range_low, range_high) in scenarios {
        // Build index for this scenario
        let bench_index = build_shared_indices(n, num_rand_distribution);
        // Create benchmark group
        let mut group = runner.new_group();
        // Now set the name (this moves scenario_id)
        group.set_name(scenario_id);
        // Define fast field types
        let field_names = ["num_rand_fast", "num_asc_fast"];
        // Generate range queries for fast fields
        for &field_name in &field_names {
            // Create the range query
            let field = bench_index.searcher.schema().get_field(field_name).unwrap();
            let lower_term = Term::from_field_u64(field, range_low);
            let upper_term = Term::from_field_u64(field, range_high);
            let query = RangeQuery::new(Bound::Included(lower_term), Bound::Included(upper_term));
            run_benchmark_tasks(
                &mut group,
                &bench_index,
                query,
                field_name,
                range_low,
                range_high,
            );
        }
        group.run();
    }
 }
 /// Run all benchmark tasks for a given range query and field name
 fn run_benchmark_tasks(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query: RangeQuery,
    field_name: &str,
    range_low: u64,
    range_high: u64,
 ) {
    // Test count
    add_bench_task_count(
        bench_group,
        bench_index,
        query.clone(),
        "count",
        field_name,
        range_low,
        range_high,
    );
    // Test top 100 by the field (ascending order)
    {
        let collector_name = format!("top100_by_{}_asc", field_name);
        let field_name_owned = field_name.to_string();
        add_bench_task_top100_asc(
            bench_group,
            bench_index,
            query.clone(),
            &collector_name,
            field_name,
            range_low,
            range_high,
            field_name_owned,
        );
    }
    // Test top 100 by the field (descending order)
    {
        let collector_name = format!("top100_by_{}_desc", field_name);
        let field_name_owned = field_name.to_string();
        add_bench_task_top100_desc(
            bench_group,
            bench_index,
            query,
            &collector_name,
            field_name,
            range_low,
            range_high,
            field_name_owned,
        );
    }
 }
 fn add_bench_task_count(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query: RangeQuery,
    collector_name: &str,
    field_name: &str,
    range_low: u64,
    range_high: u64,
 ) {
    let task_name = format!(
        "range_{}_[{} TO {}]_{}",
        field_name, range_low, range_high, collector_name
    );
    let search_task = CountSearchTask {
        searcher: bench_index.searcher.clone(),
        query,
    };
    bench_group.register(task_name, move |_| black_box(search_task.run()));
 }
 fn add_bench_task_docset(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query: RangeQuery,
    collector_name: &str,
    field_name: &str,
    range_low: u64,
    range_high: u64,
 ) {
    let task_name = format!(
        "range_{}_[{} TO {}]_{}",
        field_name, range_low, range_high, collector_name
    );
    let search_task = DocSetSearchTask {
        searcher: bench_index.searcher.clone(),
        query,
    };
    bench_group.register(task_name, move |_| black_box(search_task.run()));
 }
 fn add_bench_task_top100_asc(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query: RangeQuery,
    collector_name: &str,
    field_name: &str,
    range_low: u64,
    range_high: u64,
    field_name_owned: String,
 ) {
    let task_name = format!(
        "range_{}_[{} TO {}]_{}",
        field_name, range_low, range_high, collector_name
    );
    let search_task = Top100AscSearchTask {
        searcher: bench_index.searcher.clone(),
        query,
        field_name: field_name_owned,
    };
    bench_group.register(task_name, move |_| black_box(search_task.run()));
 }
 fn add_bench_task_top100_desc(
    bench_group: &mut BenchGroup,
    bench_index: &BenchIndex,
    query: RangeQuery,
    collector_name: &str,
    field_name: &str,
    range_low: u64,
    range_high: u64,
    field_name_owned: String,
 ) {
    let task_name = format!(
        "range_{}_[{} TO {}]_{}",
        field_name, range_low, range_high, collector_name
    );
    let search_task = Top100DescSearchTask {
        searcher: bench_index.searcher.clone(),
        query,
        field_name: field_name_owned,
    };
    bench_group.register(task_name, move |_| black_box(search_task.run()));
 }
 struct CountSearchTask {
    searcher: Searcher,
    query: RangeQuery,
 }
 impl CountSearchTask {
    #[inline(never)]
    pub fn run(&self) -> usize {
        self.searcher.search(&self.query, &Count).unwrap()
    }
 }
 struct DocSetSearchTask {
    searcher: Searcher,
    query: RangeQuery,
 }
 impl DocSetSearchTask {
    #[inline(never)]
    pub fn run(&self) -> usize {
        let result = self.searcher.search(&self.query, &DocSetCollector).unwrap();
        result.len()
    }
 }
 struct Top100AscSearchTask {
    searcher: Searcher,
    query: RangeQuery,
    field_name: String,
 }
 impl Top100AscSearchTask {
    #[inline(never)]
    pub fn run(&self) -> usize {
        let collector =
            TopDocs::with_limit(100).order_by_fast_field::<u64>(&self.field_name, Order::Asc);
        let result = self.searcher.search(&self.query, &collector).unwrap();
        for (_score, doc_address) in &result {
            let _doc: tantivy::TantivyDocument = self.searcher.doc(*doc_address).unwrap();
        }
        result.len()
    }
 }
 struct Top100DescSearchTask {
    searcher: Searcher,
    query: RangeQuery,
    field_name: String,
 }
 impl Top100DescSearchTask {
    #[inline(never)]
    pub fn run(&self) -> usize {
        let collector =
            TopDocs::with_limit(100).order_by_fast_field::<u64>(&self.field_name, Order::Desc);
        let result = self.searcher.search(&self.query, &collector).unwrap();
        for (_score, doc_address) in &result {
            let _doc: tantivy::TantivyDocument = self.searcher.doc(*doc_address).unwrap();
        }
        result.len()
    }
 }
--- a/benches/range_query.rs
+++ b/benches/range_query.rs
@@ -1,260 +0,0 @@
 use std::fmt::Display;
 use std::net::Ipv6Addr;
 use std::ops::RangeInclusive;
 use binggan::plugins::PeakMemAllocPlugin;
 use binggan::{black_box, BenchRunner, OutputValue, PeakMemAlloc, INSTRUMENTED_SYSTEM};
 use columnar::MonotonicallyMappableToU128;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::QueryParser;
 use tantivy::schema::*;
 use tantivy::{doc, Index};
 #[global_allocator]
 pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
 fn main() {
    bench_range_query();
 }
 fn bench_range_query() {
    let index = get_index_0_to_100();
    let mut runner = BenchRunner::new();
    runner.add_plugin(PeakMemAllocPlugin::new(GLOBAL));
    runner.set_name("range_query on u64");
    let field_name_and_descr: Vec<_> = vec![
        ("id", "Single Valued Range Field"),
        ("ids", "Multi Valued Range Field"),
    ];
    let range_num_hits = vec![
        ("90_percent", get_90_percent()),
        ("10_percent", get_10_percent()),
        ("1_percent", get_1_percent()),
    ];
    test_range(&mut runner, &index, &field_name_and_descr, range_num_hits);
    runner.set_name("range_query on ip");
    let field_name_and_descr: Vec<_> = vec![
        ("ip", "Single Valued Range Field"),
        ("ips", "Multi Valued Range Field"),
    ];
    let range_num_hits = vec![
        ("90_percent", get_90_percent_ip()),
        ("10_percent", get_10_percent_ip()),
        ("1_percent", get_1_percent_ip()),
    ];
    test_range(&mut runner, &index, &field_name_and_descr, range_num_hits);
 }
 fn test_range<T: Display>(
    runner: &mut BenchRunner,
    index: &Index,
    field_name_and_descr: &[(&str, &str)],
    range_num_hits: Vec<(&str, RangeInclusive<T>)>,
 ) {
    for (field, suffix) in field_name_and_descr {
        let term_num_hits = vec![
            ("", ""),
            ("1_percent", "veryfew"),
            ("10_percent", "few"),
            ("90_percent", "most"),
        ];
        let mut group = runner.new_group();
        group.set_name(suffix);
        // all intersect combinations
        for (range_name, range) in &range_num_hits {
            for (term_name, term) in &term_num_hits {
                let index = &index;
                let test_name = if term_name.is_empty() {
                    format!("id_range_hit_{}", range_name)
                } else {
                    format!(
                        "id_range_hit_{}_intersect_with_term_{}",
                        range_name, term_name
                    )
                };
                group.register(test_name, move |_| {
                    let query = if term_name.is_empty() {
                        "".to_string()
                    } else {
                        format!("AND id_name:{}", term)
                    };
                    black_box(execute_query(field, range, &query, index));
                });
            }
        }
        group.run();
    }
 }
 fn get_index_0_to_100() -> Index {
    let mut rng = StdRng::from_seed([1u8; 32]);
    let num_vals = 100_000;
    let docs: Vec<_> = (0..num_vals)
        .map(|_i| {
            let id_name = if rng.gen_bool(0.01) {
                "veryfew".to_string() // 1%
            } else if rng.gen_bool(0.1) {
                "few".to_string() // 9%
            } else {
                "most".to_string() // 90%
            };
            Doc {
                id_name,
                id: rng.gen_range(0..100),
                // Multiply by 1000, so that we create most buckets in the compact space
                // The benches depend on this range to select n-percent of elements with the
                // methods below.
                ip: Ipv6Addr::from_u128(rng.gen_range(0..100) * 1000),
            }
        })
        .collect();
    create_index_from_docs(&docs)
 }
 #[derive(Clone, Debug)]
 pub struct Doc {
    pub id_name: String,
    pub id: u64,
    pub ip: Ipv6Addr,
 }
 pub fn create_index_from_docs(docs: &[Doc]) -> Index {
    let mut schema_builder = Schema::builder();
    let id_u64_field = schema_builder.add_u64_field("id", INDEXED | STORED | FAST);
    let ids_u64_field =
        schema_builder.add_u64_field("ids", NumericOptions::default().set_fast().set_indexed());
    let id_f64_field = schema_builder.add_f64_field("id_f64", INDEXED | STORED | FAST);
    let ids_f64_field = schema_builder.add_f64_field(
        "ids_f64",
        NumericOptions::default().set_fast().set_indexed(),
    );
    let id_i64_field = schema_builder.add_i64_field("id_i64", INDEXED | STORED | FAST);
    let ids_i64_field = schema_builder.add_i64_field(
        "ids_i64",
        NumericOptions::default().set_fast().set_indexed(),
    );
    let text_field = schema_builder.add_text_field("id_name", STRING | STORED);
    let text_field2 = schema_builder.add_text_field("id_name_fast", STRING | STORED | FAST);
    let ip_field = schema_builder.add_ip_addr_field("ip", FAST);
    let ips_field = schema_builder.add_ip_addr_field("ips", FAST);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema);
    {
        let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
        for doc in docs.iter() {
            index_writer
                .add_document(doc!(
                    ids_i64_field => doc.id as i64,
                    ids_i64_field => doc.id as i64,
                    ids_f64_field => doc.id as f64,
                    ids_f64_field => doc.id as f64,
                    ids_u64_field => doc.id,
                    ids_u64_field => doc.id,
                    id_u64_field => doc.id,
                    id_f64_field => doc.id as f64,
                    id_i64_field => doc.id as i64,
                    text_field => doc.id_name.to_string(),
                    text_field2 => doc.id_name.to_string(),
                    ips_field => doc.ip,
                    ips_field => doc.ip,
                    ip_field => doc.ip,
                ))
                .unwrap();
        }
        index_writer.commit().unwrap();
    }
    index
 }
 fn get_90_percent() -> RangeInclusive<u64> {
    0..=90
 }
 fn get_10_percent() -> RangeInclusive<u64> {
    0..=10
 }
 fn get_1_percent() -> RangeInclusive<u64> {
    10..=10
 }
 fn get_90_percent_ip() -> RangeInclusive<Ipv6Addr> {
    let start = Ipv6Addr::from_u128(0);
    let end = Ipv6Addr::from_u128(90 * 1000);
    start..=end
 }
 fn get_10_percent_ip() -> RangeInclusive<Ipv6Addr> {
    let start = Ipv6Addr::from_u128(0);
    let end = Ipv6Addr::from_u128(10 * 1000);
    start..=end
 }
 fn get_1_percent_ip() -> RangeInclusive<Ipv6Addr> {
    let start = Ipv6Addr::from_u128(10 * 1000);
    let end = Ipv6Addr::from_u128(10 * 1000);
    start..=end
 }
 struct NumHits {
    count: usize,
 }
 impl OutputValue for NumHits {
    fn column_title() -> &'static str {
        "NumHits"
    }
    fn format(&self) -> Option<String> {
        Some(self.count.to_string())
    }
 }
 fn execute_query<T: Display>(
    field: &str,
    id_range: &RangeInclusive<T>,
    suffix: &str,
    index: &Index,
 ) -> NumHits {
    let gen_query_inclusive = |from: &T, to: &T| {
        format!(
            "{}:[{} TO {}] {}",
            field,
            &from.to_string(),
            &to.to_string(),
            suffix
        )
    };
    let query = gen_query_inclusive(id_range.start(), id_range.end());
    execute_query_(&query, index)
 }
 fn execute_query_(query: &str, index: &Index) -> NumHits {
    let query_from_text = |text: &str| {
        QueryParser::for_index(index, vec![])
            .parse_query(text)
            .unwrap()
    };
    let query = query_from_text(query);
    let reader = index.reader().unwrap();
    let searcher = reader.searcher();
    let num_hits = searcher
        .search(&query, &(TopDocs::with_limit(10).order_by_score(), Count))
        .unwrap()
        .1;
    NumHits { count: num_hits }
 }
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.9.0"
+version = "0.6.0"
-edition = "2024"
+edition = "2021"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = []
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -48,7 +48,7 @@ impl BitPacker {
    pub fn flush<TWrite: io::Write + ?Sized>(&mut self, output: &mut TWrite) -> io::Result<()> {
        if self.mini_buffer_written > 0 {
-            let num_bytes = self.mini_buffer_written.div_ceil(8);
+            let num_bytes = (self.mini_buffer_written + 7) / 8;
            let bytes = self.mini_buffer.to_le_bytes();
            output.write_all(&bytes[..num_bytes])?;
            self.mini_buffer_written = 0;
@@ -65,7 +65,7 @@ impl BitPacker {
 #[derive(Clone, Debug, Default, Copy)]
 pub struct BitUnpacker {
-    num_bits: usize,
+    num_bits: u32,
    mask: u64,
 }
@@ -83,7 +83,7 @@ impl BitUnpacker {
            (1u64 << num_bits) - 1u64
        };
        BitUnpacker {
-            num_bits: usize::from(num_bits),
+            num_bits: u32::from(num_bits),
            mask,
        }
    }
@@ -94,14 +94,14 @@ impl BitUnpacker {
    #[inline]
    pub fn get(&self, idx: u32, data: &[u8]) -> u64 {
-        let addr_in_bits = idx as usize * self.num_bits;
+        let addr_in_bits = idx * self.num_bits;
-        let addr = addr_in_bits >> 3;
+        let addr = (addr_in_bits >> 3) as usize;
        if addr + 8 > data.len() {
            if self.num_bits == 0 {
                return 0;
            }
            let bit_shift = addr_in_bits & 7;
-            return self.get_slow_path(addr, bit_shift as u32, data);
+            return self.get_slow_path(addr, bit_shift, data);
        }
        let bit_shift = addr_in_bits & 7;
        let bytes: [u8; 8] = (&data[addr..addr + 8]).try_into().unwrap();
@@ -134,13 +134,12 @@ impl BitUnpacker {
            "Bitwidth must be <= 32 to use this method."
        );
-        let end_idx: u32 = start_idx + output.len() as u32;
+        let end_idx = start_idx + output.len() as u32;
-        // We use `usize` here to avoid overflow issues.
+        let end_bit_read = end_idx * self.num_bits;
-        let end_bit_read = (end_idx as usize) * self.num_bits;
+        let end_byte_read = (end_bit_read + 7) / 8;
        let end_byte_read = end_bit_read.div_ceil(8);
        assert!(
-            end_byte_read <= data.len(),
+            end_byte_read as usize <= data.len(),
            "Requested index is out of bounds."
        );
@@ -160,24 +159,24 @@ impl BitUnpacker {
        // We want the start of the fast track to start align with bytes.
        // A sufficient condition is to start with an idx that is a multiple of 8,
        // so highway start is the closest multiple of 8 that is >= start_idx.
-        let entrance_ramp_len: u32 = 8 - (start_idx % 8) % 8;
+        let entrance_ramp_len = 8 - (start_idx % 8) % 8;
        let highway_start: u32 = start_idx + entrance_ramp_len;
-        if highway_start + (BitPacker1x::BLOCK_LEN as u32) > end_idx {
+        if highway_start + BitPacker1x::BLOCK_LEN as u32 > end_idx {
            // We don't have enough values to have even a single block of highway.
            // Let's just supply the values the simple way.
            get_batch_ramp(start_idx, output);
            return;
        }
-        let num_blocks: usize = (end_idx - highway_start) as usize / BitPacker1x::BLOCK_LEN;
+        let num_blocks: u32 = (end_idx - highway_start) / BitPacker1x::BLOCK_LEN as u32;
        // Entrance ramp
        get_batch_ramp(start_idx, &mut output[..entrance_ramp_len as usize]);
        // Highway
-        let mut offset = (highway_start as usize * self.num_bits) / 8;
+        let mut offset = (highway_start * self.num_bits) as usize / 8;
        let mut output_cursor = (highway_start - start_idx) as usize;
        for _ in 0..num_blocks {
            offset += BitPacker1x.decompress(
@@ -189,7 +188,7 @@ impl BitUnpacker {
        }
        // Exit ramp
-        let highway_end: u32 = highway_start + (num_blocks * BitPacker1x::BLOCK_LEN) as u32;
+        let highway_end = highway_start + num_blocks * BitPacker1x::BLOCK_LEN as u32;
        get_batch_ramp(highway_end, &mut output[output_cursor..]);
    }
@@ -258,7 +257,7 @@ mod test {
            bitpacker.write(val, num_bits, &mut data).unwrap();
        }
        bitpacker.close(&mut data).unwrap();
-        assert_eq!(data.len(), ((num_bits as usize) * len).div_ceil(8));
+        assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8);
        let bitunpacker = BitUnpacker::new(num_bits);
        (bitunpacker, vals, data)
    }
@@ -304,7 +303,7 @@ mod test {
            bitpacker.write(val, num_bits, &mut buffer).unwrap();
        }
        bitpacker.flush(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), (vals.len() * num_bits as usize).div_ceil(8));
+        assert_eq!(buffer.len(), (vals.len() * num_bits as usize + 7) / 8);
        let bitunpacker = BitUnpacker::new(num_bits);
        let max_val = if num_bits == 64 {
            u64::MAX
--- a/bitpacker/src/blocked_bitpacker.rs
+++ b/bitpacker/src/blocked_bitpacker.rs
@@ -1,6 +1,6 @@
 use super::bitpacker::BitPacker;
 use super::compute_num_bits;
-use crate::{BitUnpacker, minmax};
+use crate::{minmax, BitUnpacker};
 const BLOCK_SIZE: usize = 128;
@@ -34,7 +34,7 @@ struct BlockedBitpackerEntryMetaData {
 impl BlockedBitpackerEntryMetaData {
    fn new(offset: u64, num_bits: u8, base_value: u64) -> Self {
-        let encoded = offset | (u64::from(num_bits) << (64 - 8));
+        let encoded = offset | (num_bits as u64) << (64 - 8);
        Self {
            encoded,
            base_value,
@@ -140,10 +140,10 @@ impl BlockedBitpacker {
    pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
        // todo performance: we could decompress a whole block and cache it instead
        let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE;
-
+        let iter = (0..bitpacked_elems)
        (0..bitpacked_elems)
            .map(move |idx| self.get(idx))
-            .chain(self.buffer.iter().cloned())
+            .chain(self.buffer.iter().cloned());
        iter
    }
 }
--- a/bitpacker/src/filter_vec/avx2.rs
+++ b/bitpacker/src/filter_vec/avx2.rs
@@ -19,7 +19,7 @@ fn u32_to_i32(val: u32) -> i32 {
 #[inline]
 unsafe fn u32_to_i32_avx2(vals_u32x8s: DataType) -> DataType {
    const HIGHEST_BIT_MASK: DataType = from_u32x8([HIGHEST_BIT; NUM_LANES]);
-    unsafe { op_xor(vals_u32x8s, HIGHEST_BIT_MASK) }
+    op_xor(vals_u32x8s, HIGHEST_BIT_MASK)
 }
 pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
@@ -66,19 +66,17 @@ unsafe fn filter_vec_avx2_aux(
    ]);
    const SHIFT: __m256i = from_u32x8([NUM_LANES as u32; NUM_LANES]);
    for _ in 0..num_words {
-        unsafe {
+        let word = load_unaligned(input);
-            let word = load_unaligned(input);
+        let word = u32_to_i32_avx2(word);
-            let word = u32_to_i32_avx2(word);
+        let keeper_bitset = compute_filter_bitset(word, range_simd.clone());
-            let keeper_bitset = compute_filter_bitset(word, range_simd.clone());
+        let added_len = keeper_bitset.count_ones();
-            let added_len = keeper_bitset.count_ones();
+        let filtered_doc_ids = compact(ids, keeper_bitset);
-            let filtered_doc_ids = compact(ids, keeper_bitset);
+        store_unaligned(output_tail as *mut __m256i, filtered_doc_ids);
-            store_unaligned(output_tail as *mut __m256i, filtered_doc_ids);
+        output_tail = output_tail.offset(added_len as isize);
-            output_tail = output_tail.offset(added_len as isize);
+        ids = op_add(ids, SHIFT);
-            ids = op_add(ids, SHIFT);
+        input = input.offset(1);
            input = input.offset(1);
        }
    }
-    unsafe { output_tail.offset_from(output) as usize }
+    output_tail.offset_from(output) as usize
 }
 #[inline]
@@ -94,7 +92,8 @@ unsafe fn compute_filter_bitset(val: __m256i, range: std::ops::RangeInclusive<__
    let too_low = op_greater(*range.start(), val);
    let too_high = op_greater(val, *range.end());
    let inside = op_or(too_low, too_high);
-    255 - std::arch::x86_64::_mm256_movemask_ps(_mm256_castsi256_ps(inside)) as u8
+    255 - std::arch::x86_64::_mm256_movemask_ps(std::mem::transmute::<DataType, __m256>(inside))
        as u8
 }
 union U8x32 {
--- a/bitpacker/src/filter_vec/mod.rs
+++ b/bitpacker/src/filter_vec/mod.rs
@@ -35,8 +35,8 @@ const IMPLS: [FilterImplPerInstructionSet; 2] = [
 const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Scalar];
 impl FilterImplPerInstructionSet {
    #[allow(unused_variables)]
    #[inline]
    #[allow(unused_variables)] // on non-x86_64, code is unused.
    fn from(code: u8) -> FilterImplPerInstructionSet {
        #[cfg(target_arch = "x86_64")]
        if code == FilterImplPerInstructionSet::AVX2 as u8 {
--- a/bitpacker/src/lib.rs
+++ b/bitpacker/src/lib.rs
@@ -33,7 +33,11 @@ pub use crate::blocked_bitpacker::BlockedBitpacker;
 /// number of bits.
 pub fn compute_num_bits(n: u64) -> u8 {
    let amplitude = (64u32 - n.leading_zeros()) as u8;
-    if amplitude <= 64 - 8 { amplitude } else { 64 }
+    if amplitude <= 64 - 8 {
        amplitude
    } else {
        64
    }
 }
 /// Computes the (min, max) of an iterator of `PartialOrd` values.
--- a/cliff.toml
+++ b/cliff.toml
@@ -16,14 +16,14 @@ body = """
 {%- if version %} in {{ version }}{%- endif -%}
 {% for commit in commits %}
-  {% if commit.remote.pr_title -%}
+  {% if commit.github.pr_title -%}
-    {%- set commit_message = commit.remote.pr_title -%}
+    {%- set commit_message = commit.github.pr_title -%}
  {%- else -%}
    {%- set commit_message = commit.message -%}
  {%- endif -%}
  - {{ commit_message | split(pat="\n") | first | trim }}\
-    {% if commit.remote.pr_number %} \
+    {% if commit.github.pr_number %} \
-      [#{{ commit.remote.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.remote.pr_number }}){% if commit.remote.username %}(@{{ commit.remote.username }}){%- endif -%} \
+      [#{{ commit.github.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.github.pr_number }}){% if commit.github.username %}(@{{ commit.github.username }}){%- endif -%} \
    {%- endif %}
 {%- endfor -%}
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tantivy-columnar"
-version = "0.6.0"
+version = "0.3.0"
-edition = "2024"
+edition = "2021"
 license = "MIT"
 homepage = "https://github.com/quickwit-oss/tantivy"
 repository = "https://github.com/quickwit-oss/tantivy"
@@ -9,21 +9,21 @@ description = "column oriented storage for tantivy"
 categories = ["database-implementations", "data-structures", "compression"]
 [dependencies]
-itertools = "0.14.0"
+itertools = "0.13.0"
 fastdivide = "0.4.0"
-stacker = { version= "0.6", path = "../stacker", package="tantivy-stacker"}
+stacker = { version= "0.3", path = "../stacker", package="tantivy-stacker"}
-sstable = { version= "0.6", path = "../sstable", package = "tantivy-sstable" }
+sstable = { version= "0.3", path = "../sstable", package = "tantivy-sstable" }
-common = { version= "0.10", path = "../common", package = "tantivy-common" }
+common = { version= "0.7", path = "../common", package = "tantivy-common" }
-tantivy-bitpacker = { version= "0.9", path = "../bitpacker/" }
+tantivy-bitpacker = { version= "0.6", path = "../bitpacker/" }
 serde = "1.0.152"
-downcast-rs = "2.0.1"
+downcast-rs = "1.2.0"
 [dev-dependencies]
 proptest = "1"
 more-asserts = "0.3.1"
 rand = "0.8"
-binggan = "0.14.0"
+binggan = "0.12.0"
 [[bench]]
 name = "bench_merge"
@@ -33,29 +33,6 @@ harness = false
 name = "bench_access"
 harness = false
 [[bench]]
 name = "bench_first_vals"
 harness = false
 [[bench]]
 name = "bench_values_u64"
 harness = false
 [[bench]]
 name = "bench_values_u128"
 harness = false
 [[bench]]
 name = "bench_create_column_values"
 harness = false
 [[bench]]
 name = "bench_column_values_get"
 harness = false
 [[bench]]
 name = "bench_optional_index"
 harness = false
 [features]
-zstd-compression = ["sstable/zstd-compression"]
+unstable = []
--- a/columnar/README.md
+++ b/columnar/README.md
@@ -73,7 +73,7 @@ The crate introduces the following concepts.
 `Columnar` is an equivalent of a dataframe.
 It maps `column_key` to `Column`.
-A `Column<T>` associates a `RowId` (u32) to any
+A `Column<T>` asssociates a `RowId` (u32) to any
 number of values.
 This is made possible by wrapping a `ColumnIndex` and a `ColumnValue` object.
--- a/columnar/benches/bench_access.rs
+++ b/columnar/benches/bench_access.rs
@@ -1,4 +1,4 @@
-use binggan::{InputGroup, black_box};
+use binggan::{black_box, InputGroup};
 use common::*;
 use tantivy_columnar::Column;
@@ -19,7 +19,7 @@ fn main() {
    let mut add_card = |card1: Card| {
        inputs.push((
-            card1.to_string(),
+            format!("{card1}"),
            generate_columnar_and_open(card1, NUM_DOCS),
        ));
    };
@@ -42,6 +42,7 @@ fn bench_group(mut runner: InputGroup<Column>) {
            }
        }
        black_box(sum);
        None
    });
    runner.register("access_first_vals", |column| {
        let mut sum = 0;
@@ -50,7 +51,6 @@ fn bench_group(mut runner: InputGroup<Column>) {
        let mut buffer = vec![None; BLOCK_SIZE];
        for i in (0..NUM_DOCS).step_by(BLOCK_SIZE) {
            // fill docs
            #[allow(clippy::needless_range_loop)]
            for idx in 0..BLOCK_SIZE {
                docs[idx] = idx as u32 + i;
            }
@@ -63,6 +63,7 @@ fn bench_group(mut runner: InputGroup<Column>) {
        }
        black_box(sum);
        None
    });
    runner.run();
 }
--- a/columnar/benches/bench_column_values_get.rs
+++ b/columnar/benches/bench_column_values_get.rs
@@ -1,61 +0,0 @@
 use std::sync::Arc;
 use binggan::{InputGroup, black_box};
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tantivy_columnar::ColumnValues;
 use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
 fn get_data() -> Vec<u64> {
    let mut rng = StdRng::seed_from_u64(2u64);
    let mut data: Vec<_> = (100..55_000_u64)
        .map(|num| num + rng.r#gen::<u8>() as u64)
        .collect();
    data.push(99_000);
    data.insert(1000, 2000);
    data.insert(2000, 100);
    data.insert(3000, 4100);
    data.insert(4000, 100);
    data.insert(5000, 800);
    data
 }
 #[inline(never)]
 fn value_iter() -> impl Iterator<Item = u64> {
    0..20_000
 }
 type Col = Arc<dyn ColumnValues<u64>>;
 fn main() {
    let data = get_data();
    let inputs: Vec<(String, Col)> = vec![
        (
            "bitpacked".to_string(),
            serialize_and_load_u64_based_column_values(&data.as_slice(), &[CodecType::Bitpacked]),
        ),
        (
            "linear".to_string(),
            serialize_and_load_u64_based_column_values(&data.as_slice(), &[CodecType::Linear]),
        ),
        (
            "blockwise_linear".to_string(),
            serialize_and_load_u64_based_column_values(
                &data.as_slice(),
                &[CodecType::BlockwiseLinear],
            ),
        ),
    ];
    let mut group: InputGroup<Col> = InputGroup::new_with_inputs(inputs);
    group.register("fastfield_get", |col: &Col| {
        let mut sum = 0u64;
        for pos in value_iter() {
            sum = sum.wrapping_add(col.get_val(pos as u32));
        }
        black_box(sum);
    });
    group.run();
 }
--- a/columnar/benches/bench_create_column_values.rs
+++ b/columnar/benches/bench_create_column_values.rs
@@ -1,44 +0,0 @@
 use binggan::{InputGroup, black_box};
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tantivy_columnar::column_values::{CodecType, serialize_u64_based_column_values};
 fn get_data() -> Vec<u64> {
    let mut rng = StdRng::seed_from_u64(2u64);
    let mut data: Vec<_> = (100..55_000_u64)
        .map(|num| num + rng.r#gen::<u8>() as u64)
        .collect();
    data.push(99_000);
    data.insert(1000, 2000);
    data.insert(2000, 100);
    data.insert(3000, 4100);
    data.insert(4000, 100);
    data.insert(5000, 800);
    data
 }
 fn main() {
    let data = get_data();
    let mut group: InputGroup<(CodecType, Vec<u64>)> = InputGroup::new_with_inputs(vec![
        (
            "bitpacked codec".to_string(),
            (CodecType::Bitpacked, data.clone()),
        ),
        (
            "linear codec".to_string(),
            (CodecType::Linear, data.clone()),
        ),
        (
            "blockwise linear codec".to_string(),
            (CodecType::BlockwiseLinear, data.clone()),
        ),
    ]);
    group.register("serialize column_values", |data| {
        let mut buffer = Vec::new();
        serialize_u64_based_column_values(&data.1.as_slice(), &[data.0], &mut buffer).unwrap();
        black_box(buffer.len());
    });
    group.run();
 }
--- a/columnar/benches/bench_first_vals.rs
+++ b/columnar/benches/bench_first_vals.rs
@@ -1,9 +1,12 @@
 #![feature(test)]
 extern crate test;
 use std::sync::Arc;
 use binggan::{InputGroup, black_box};
 use rand::prelude::*;
-use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
+use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
 use tantivy_columnar::*;
 use test::{black_box, Bencher};
 struct Columns {
    pub optional: Column,
@@ -65,38 +68,88 @@ pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<dyn Colu
    serialize_and_load_u64_based_column_values(&column, &[codec_type])
 }
-fn main() {
+fn run_bench_on_column_full_scan(b: &mut Bencher, column: Column) {
-    let Columns {
+    let num_iter = black_box(NUM_VALUES);
-        optional,
+    b.iter(|| {
        full,
        multi,
    } = get_test_columns();
    let inputs = vec![
        ("full".to_string(), full),
        ("optional".to_string(), optional),
        ("multi".to_string(), multi),
    ];
    let mut group = InputGroup::new_with_inputs(inputs);
    group.register("first_full_scan", |column| {
        let mut sum = 0u64;
-        for i in 0..NUM_VALUES as u32 {
+        for i in 0..num_iter as u32 {
            let val = column.first(i);
            sum += val.unwrap_or(0);
        }
-        black_box(sum);
+        sum
    });
-
+}
-    group.register("first_block_single_calls", |column| {
+fn run_bench_on_column_block_fetch(b: &mut Bencher, column: Column) {
-        let mut block: Vec<Option<u64>> = vec![None; 64];
+    let mut block: Vec<Option<u64>> = vec![None; 64];
-        let fetch_docids = (0..64).collect::<Vec<_>>();
+    let fetch_docids = (0..64).collect::<Vec<_>>();
    b.iter(move || {
        column.first_vals(&fetch_docids, &mut block);
        block[0]
    });
 }
 fn run_bench_on_column_block_single_calls(b: &mut Bencher, column: Column) {
    let mut block: Vec<Option<u64>> = vec![None; 64];
    let fetch_docids = (0..64).collect::<Vec<_>>();
    b.iter(move || {
        for i in 0..fetch_docids.len() {
            block[i] = column.first(fetch_docids[i]);
        }
-        black_box(block[0]);
+        block[0]
    });
-
+}
-    group.run();
+
 /// Column first method
 #[bench]
 fn bench_get_first_on_full_column_full_scan(b: &mut Bencher) {
    let column = get_test_columns().full;
    run_bench_on_column_full_scan(b, column);
 }
 #[bench]
 fn bench_get_first_on_optional_column_full_scan(b: &mut Bencher) {
    let column = get_test_columns().optional;
    run_bench_on_column_full_scan(b, column);
 }
 #[bench]
 fn bench_get_first_on_multi_column_full_scan(b: &mut Bencher) {
    let column = get_test_columns().multi;
    run_bench_on_column_full_scan(b, column);
 }
 /// Block fetch column accessor
 #[bench]
 fn bench_get_block_first_on_optional_column(b: &mut Bencher) {
    let column = get_test_columns().optional;
    run_bench_on_column_block_fetch(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_multi_column(b: &mut Bencher) {
    let column = get_test_columns().multi;
    run_bench_on_column_block_fetch(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_full_column(b: &mut Bencher) {
    let column = get_test_columns().full;
    run_bench_on_column_block_fetch(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_optional_column_single_calls(b: &mut Bencher) {
    let column = get_test_columns().optional;
    run_bench_on_column_block_single_calls(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_multi_column_single_calls(b: &mut Bencher) {
    let column = get_test_columns().multi;
    run_bench_on_column_block_single_calls(b, column);
 }
 #[bench]
 fn bench_get_block_first_on_full_column_single_calls(b: &mut Bencher) {
    let column = get_test_columns().full;
    run_bench_on_column_block_single_calls(b, column);
 }
--- a/columnar/benches/bench_merge.rs
+++ b/columnar/benches/bench_merge.rs
@@ -1,7 +1,7 @@
 pub mod common;
-use binggan::BenchRunner;
+use binggan::{black_box, BenchRunner};
-use common::{Card, generate_columnar_with_name};
+use common::{generate_columnar_with_name, Card};
 use tantivy_columnar::*;
 const NUM_DOCS: u32 = 100_000;
@@ -29,7 +29,7 @@ fn main() {
    add_combo(Card::Multi, Card::Dense);
    add_combo(Card::Multi, Card::Sparse);
-    let mut runner: BenchRunner = BenchRunner::new();
+    let runner: BenchRunner = BenchRunner::new();
    let mut group = runner.new_group();
    for (input_name, columnar_readers) in inputs.iter() {
        group.register_with_input(
--- a/columnar/benches/bench_optional_index.rs
+++ b/columnar/benches/bench_optional_index.rs
@@ -1,106 +0,0 @@
 use binggan::{InputGroup, black_box};
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tantivy_columnar::column_index::{OptionalIndex, Set};
 const TOTAL_NUM_VALUES: u32 = 1_000_000;
 fn gen_optional_index(fill_ratio: f64) -> OptionalIndex {
    let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
    let vals: Vec<u32> = (0..TOTAL_NUM_VALUES)
        .map(|_| rng.gen_bool(fill_ratio))
        .enumerate()
        .filter(|(_pos, val)| *val)
        .map(|(pos, _)| pos as u32)
        .collect();
    OptionalIndex::for_test(TOTAL_NUM_VALUES, &vals)
 }
 fn random_range_iterator(
    start: u32,
    end: u32,
    avg_step_size: u32,
    avg_deviation: u32,
 ) -> impl Iterator<Item = u32> {
    let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
    let mut current = start;
    std::iter::from_fn(move || {
        current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
        if current >= end { None } else { Some(current) }
    })
 }
 fn n_percent_step_iterator(percent: f32, num_values: u32) -> impl Iterator<Item = u32> {
    let ratio = percent / 100.0;
    let step_size = (1f32 / ratio) as u32;
    let deviation = step_size - 1;
    random_range_iterator(0, num_values, step_size, deviation)
 }
 fn walk_over_data(codec: &OptionalIndex, avg_step_size: u32) -> Option<u32> {
    walk_over_data_from_positions(
        codec,
        random_range_iterator(0, TOTAL_NUM_VALUES, avg_step_size, 0),
    )
 }
 fn walk_over_data_from_positions(
    codec: &OptionalIndex,
    positions: impl Iterator<Item = u32>,
 ) -> Option<u32> {
    let mut dense_idx: Option<u32> = None;
    for idx in positions {
        dense_idx = dense_idx.or(codec.rank_if_exists(idx));
    }
    dense_idx
 }
 fn main() {
    // Build separate inputs for each fill ratio.
    let inputs: Vec<(String, OptionalIndex)> = vec![
        ("fill=1%".to_string(), gen_optional_index(0.01)),
        ("fill=5%".to_string(), gen_optional_index(0.05)),
        ("fill=10%".to_string(), gen_optional_index(0.10)),
        ("fill=50%".to_string(), gen_optional_index(0.50)),
        ("fill=90%".to_string(), gen_optional_index(0.90)),
    ];
    let mut group: InputGroup<OptionalIndex> = InputGroup::new_with_inputs(inputs);
    // Translate orig->codec (rank_if_exists) with sampling
    group.register("orig_to_codec_10pct_hit", |codec: &OptionalIndex| {
        black_box(walk_over_data(codec, 100));
    });
    group.register("orig_to_codec_1pct_hit", |codec: &OptionalIndex| {
        black_box(walk_over_data(codec, 1000));
    });
    group.register("orig_to_codec_full_scan", |codec: &OptionalIndex| {
        black_box(walk_over_data_from_positions(codec, 0..TOTAL_NUM_VALUES));
    });
    // Translate codec->orig (select/select_batch) on sampled ranks
    fn bench_translate_codec_to_orig_util(codec: &OptionalIndex, percent_hit: f32) {
        let num_non_nulls = codec.num_non_nulls();
        let idxs: Vec<u32> = if percent_hit == 100.0f32 {
            (0..num_non_nulls).collect()
        } else {
            n_percent_step_iterator(percent_hit, num_non_nulls).collect()
        };
        let mut output = vec![0u32; idxs.len()];
        output.copy_from_slice(&idxs[..]);
        codec.select_batch(&mut output);
        black_box(output);
    }
    group.register("codec_to_orig_0.005pct_hit", |codec: &OptionalIndex| {
        bench_translate_codec_to_orig_util(codec, 0.005);
    });
    group.register("codec_to_orig_10pct_hit", |codec: &OptionalIndex| {
        bench_translate_codec_to_orig_util(codec, 10.0);
    });
    group.register("codec_to_orig_full_scan", |codec: &OptionalIndex| {
        bench_translate_codec_to_orig_util(codec, 100.0);
    });
    group.run();
 }
--- a/columnar/benches/bench_values_u128.rs
+++ b/columnar/benches/bench_values_u128.rs
@@ -1,12 +1,15 @@
 #![feature(test)]
 use std::ops::RangeInclusive;
 use std::sync::Arc;
 use binggan::{InputGroup, black_box};
 use common::OwnedBytes;
 use rand::rngs::StdRng;
 use rand::seq::SliceRandom;
-use rand::{Rng, SeedableRng, random};
+use rand::{random, Rng, SeedableRng};
 use tantivy_columnar::ColumnValues;
 use test::Bencher;
 extern crate test;
 // TODO does this make sense for IPv6 ?
 fn generate_random() -> Vec<u64> {
@@ -44,77 +47,78 @@ fn get_data_50percent_item() -> Vec<u128> {
    }
    data.push(SINGLE_ITEM);
    data.shuffle(&mut rng);
-    data.iter().map(|el| *el as u128).collect::<Vec<_>>()
+    let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
    data
 }
-fn main() {
+#[bench]
 fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) {
    let data = get_data_50percent_item();
-    let column_range = get_u128_column_from_data(&data);
+    let column = get_u128_column_from_data(&data);
    let column_random = get_u128_column_random();
-    struct Inputs {
+    b.iter(|| {
        data: Vec<u128>,
        column_range: Arc<dyn ColumnValues<u128>>,
        column_random: Arc<dyn ColumnValues<u128>>,
    }
    let inputs = Inputs {
        data,
        column_range,
        column_random,
    };
    let mut group: InputGroup<Inputs> =
        InputGroup::new_with_inputs(vec![("u128 benches".to_string(), inputs)]);
    group.register(
        "intfastfield_getrange_u128_50percent_hit",
        |inp: &Inputs| {
            let mut positions = Vec::new();
            inp.column_range.get_row_ids_for_value_range(
                *FIFTY_PERCENT_RANGE.start() as u128..=*FIFTY_PERCENT_RANGE.end() as u128,
                0..inp.data.len() as u32,
                &mut positions,
            );
            black_box(positions.len());
        },
    );
    group.register("intfastfield_getrange_u128_single_hit", |inp: &Inputs| {
        let mut positions = Vec::new();
-        inp.column_range.get_row_ids_for_value_range(
+        column.get_row_ids_for_value_range(
            *FIFTY_PERCENT_RANGE.start() as u128..=*FIFTY_PERCENT_RANGE.end() as u128,
            0..data.len() as u32,
            &mut positions,
        );
        positions
    });
 }
 #[bench]
 fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) {
    let data = get_data_50percent_item();
    let column = get_u128_column_from_data(&data);
    b.iter(|| {
        let mut positions = Vec::new();
        column.get_row_ids_for_value_range(
            *SINGLE_ITEM_RANGE.start() as u128..=*SINGLE_ITEM_RANGE.end() as u128,
-            0..inp.data.len() as u32,
+            0..data.len() as u32,
            &mut positions,
        );
-        black_box(positions.len());
+        positions
    });
 }
-    group.register("intfastfield_getrange_u128_hit_all", |inp: &Inputs| {
+#[bench]
 fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) {
    let data = get_data_50percent_item();
    let column = get_u128_column_from_data(&data);
    b.iter(|| {
        let mut positions = Vec::new();
-        inp.column_range.get_row_ids_for_value_range(
+        column.get_row_ids_for_value_range(0..=u128::MAX, 0..data.len() as u32, &mut positions);
-            0..=u128::MAX,
+        positions
            0..inp.data.len() as u32,
            &mut positions,
        );
        black_box(positions.len());
    });
 }
 // U128 RANGE END
-    group.register("intfastfield_scan_all_fflookup_u128", |inp: &Inputs| {
+#[bench]
 fn bench_intfastfield_scan_all_fflookup_u128(b: &mut Bencher) {
    let column = get_u128_column_random();
    b.iter(|| {
        let mut a = 0u128;
-        for i in 0u64..inp.column_random.num_vals() as u64 {
+        for i in 0u64..column.num_vals() as u64 {
-            a += inp.column_random.get_val(i as u32);
+            a += column.get_val(i as u32);
        }
-        black_box(a);
+        a
    });
 }
-    group.register("intfastfield_jumpy_stride5_u128", |inp: &Inputs| {
+#[bench]
-        let n = inp.column_random.num_vals();
+fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) {
    let column = get_u128_column_random();
    b.iter(|| {
        let n = column.num_vals();
        let mut a = 0u128;
        for i in (0..n / 5).map(|val| val * 5) {
-            a += inp.column_random.get_val(i);
+            a += column.get_val(i);
        }
-        black_box(a);
+        a
    });
    group.run();
 }
--- a/columnar/benches/bench_values_u64.rs
+++ b/columnar/benches/bench_values_u64.rs
@@ -1,10 +1,13 @@
 #![feature(test)]
 extern crate test;
 use std::ops::RangeInclusive;
 use std::sync::Arc;
 use binggan::{InputGroup, black_box};
 use rand::prelude::*;
-use tantivy_columnar::column_values::{CodecType, serialize_and_load_u64_based_column_values};
+use tantivy_columnar::column_values::{serialize_and_load_u64_based_column_values, CodecType};
 use tantivy_columnar::*;
 use test::Bencher;
 // Warning: this generates the same permutation at each call
 fn generate_permutation() -> Vec<u64> {
@@ -24,11 +27,37 @@ pub fn serialize_and_load(column: &[u64], codec_type: CodecType) -> Arc<dyn Colu
    serialize_and_load_u64_based_column_values(&column, &[codec_type])
 }
 #[bench]
 fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) {
    let permutation = generate_permutation();
    let n = permutation.len();
    b.iter(|| {
        let mut a = 0u64;
        for _ in 0..n {
            a = permutation[a as usize];
        }
        a
    });
 }
 #[bench]
 fn bench_intfastfield_jumpy_fflookup_bitpacked(b: &mut Bencher) {
    let permutation = generate_permutation();
    let n = permutation.len();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
    b.iter(|| {
        let mut a = 0u64;
        for _ in 0..n {
            a = column.get_val(a as u32);
        }
        a
    });
 }
 const FIFTY_PERCENT_RANGE: RangeInclusive<u64> = 1..=50;
 const SINGLE_ITEM: u64 = 90;
 const SINGLE_ITEM_RANGE: RangeInclusive<u64> = 90..=90;
 const ONE_PERCENT_ITEM_RANGE: RangeInclusive<u64> = 49..=49;
 fn get_data_50percent_item() -> Vec<u128> {
    let mut rng = StdRng::from_seed([1u8; 32]);
@@ -40,122 +69,135 @@ fn get_data_50percent_item() -> Vec<u128> {
    data.push(SINGLE_ITEM);
    data.shuffle(&mut rng);
-    data.iter().map(|el| *el as u128).collect::<Vec<_>>()
+    let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
    data
 }
-type VecCol = (Vec<u64>, Arc<dyn ColumnValues<u64>>);
+// U64 RANGE START
 #[bench]
 fn bench_intfastfield_getrange_u64_50percent_hit(b: &mut Bencher) {
    let data = get_data_50percent_item();
    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
    b.iter(|| {
        let mut positions = Vec::new();
        column.get_row_ids_for_value_range(
            FIFTY_PERCENT_RANGE,
            0..data.len() as u32,
            &mut positions,
        );
        positions
    });
 }
-fn bench_access() {
+#[bench]
 fn bench_intfastfield_getrange_u64_1percent_hit(b: &mut Bencher) {
    let data = get_data_50percent_item();
    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
    b.iter(|| {
        let mut positions = Vec::new();
        column.get_row_ids_for_value_range(
            ONE_PERCENT_ITEM_RANGE,
            0..data.len() as u32,
            &mut positions,
        );
        positions
    });
 }
 #[bench]
 fn bench_intfastfield_getrange_u64_single_hit(b: &mut Bencher) {
    let data = get_data_50percent_item();
    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
    b.iter(|| {
        let mut positions = Vec::new();
        column.get_row_ids_for_value_range(SINGLE_ITEM_RANGE, 0..data.len() as u32, &mut positions);
        positions
    });
 }
 #[bench]
 fn bench_intfastfield_getrange_u64_hit_all(b: &mut Bencher) {
    let data = get_data_50percent_item();
    let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
    b.iter(|| {
        let mut positions = Vec::new();
        column.get_row_ids_for_value_range(0..=u64::MAX, 0..data.len() as u32, &mut positions);
        positions
    });
 }
 // U64 RANGE END
 #[bench]
 fn bench_intfastfield_stride7_vec(b: &mut Bencher) {
    let permutation = generate_permutation();
-    let column_perm: Arc<dyn ColumnValues<u64>> =
+    let n = permutation.len();
-        serialize_and_load(&permutation, CodecType::Bitpacked);
+    b.iter(|| {
    let permutation_gcd = generate_permutation_gcd();
    let column_perm_gcd: Arc<dyn ColumnValues<u64>> =
        serialize_and_load(&permutation_gcd, CodecType::Bitpacked);
    let mut group: InputGroup<VecCol> = InputGroup::new_with_inputs(vec![
        (
            "access".to_string(),
            (permutation.clone(), column_perm.clone()),
        ),
        (
            "access_gcd".to_string(),
            (permutation_gcd.clone(), column_perm_gcd.clone()),
        ),
    ]);
    group.register("stride7_vec", |inp: &VecCol| {
        let n = inp.0.len();
        let mut a = 0u64;
        for i in (0..n / 7).map(|val| val * 7) {
-            a += inp.0[i];
+            a += permutation[i as usize];
        }
-        black_box(a);
+        a
    });
 }
-    group.register("fullscan_vec", |inp: &VecCol| {
+#[bench]
-        let mut a = 0u64;
+fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) {
-        for i in 0..inp.0.len() {
+    let permutation = generate_permutation();
-            a += inp.0[i];
+    let n = permutation.len();
-        }
+    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
-        black_box(a);
+    b.iter(|| {
-    });
+        let mut a = 0;
    group.register("stride7_column_values", |inp: &VecCol| {
        let n = inp.1.num_vals() as usize;
        let mut a = 0u64;
        for i in (0..n / 7).map(|val| val * 7) {
-            a += inp.1.get_val(i as u32);
+            a += column.get_val(i as u32);
        }
-        black_box(a);
+        a
    });
 }
-    group.register("fullscan_column_values", |inp: &VecCol| {
+#[bench]
 fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) {
    let permutation = generate_permutation();
    let n = permutation.len();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
    let column_ref = column.as_ref();
    b.iter(|| {
        let mut a = 0u64;
        for i in 0u32..n as u32 {
            a += column_ref.get_val(i);
        }
        a
    });
 }
 #[bench]
 fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) {
    let permutation = generate_permutation_gcd();
    let n = permutation.len();
    let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&permutation, CodecType::Bitpacked);
    b.iter(|| {
        let mut a = 0u64;
        let n = inp.1.num_vals() as usize;
        for i in 0..n {
-            a += inp.1.get_val(i as u32);
+            a += column.get_val(i as u32);
        }
-        black_box(a);
+        a
    });
    group.run();
 }
-fn bench_range() {
+#[bench]
-    let data_50 = get_data_50percent_item();
+fn bench_intfastfield_scan_all_vec(b: &mut Bencher) {
-    let data_u64 = data_50.iter().map(|el| *el as u64).collect::<Vec<_>>();
+    let permutation = generate_permutation();
-    let column_data: Arc<dyn ColumnValues<u64>> =
+    b.iter(|| {
-        serialize_and_load(&data_u64, CodecType::Bitpacked);
+        let mut a = 0u64;
-
+        for i in 0..permutation.len() {
-    let mut group: InputGroup<Arc<dyn ColumnValues<u64>>> =
+            a += permutation[i as usize] as u64;
-        InputGroup::new_with_inputs(vec![("dist_50pct_item".to_string(), column_data.clone())]);
+        }
-
+        a
-    group.register(
+    });
        "fastfield_getrange_u64_50percent_hit",
        |col: &Arc<dyn ColumnValues<u64>>| {
            let mut positions = Vec::new();
            col.get_row_ids_for_value_range(FIFTY_PERCENT_RANGE, 0..col.num_vals(), &mut positions);
            black_box(positions.len());
        },
    );
    group.register(
        "fastfield_getrange_u64_1percent_hit",
        |col: &Arc<dyn ColumnValues<u64>>| {
            let mut positions = Vec::new();
            col.get_row_ids_for_value_range(
                ONE_PERCENT_ITEM_RANGE,
                0..col.num_vals(),
                &mut positions,
            );
            black_box(positions.len());
        },
    );
    group.register(
        "fastfield_getrange_u64_single_hit",
        |col: &Arc<dyn ColumnValues<u64>>| {
            let mut positions = Vec::new();
            col.get_row_ids_for_value_range(SINGLE_ITEM_RANGE, 0..col.num_vals(), &mut positions);
            black_box(positions.len());
        },
    );
    group.register(
        "fastfield_getrange_u64_hit_all",
        |col: &Arc<dyn ColumnValues<u64>>| {
            let mut positions = Vec::new();
            col.get_row_ids_for_value_range(0..=u64::MAX, 0..col.num_vals(), &mut positions);
            black_box(positions.len());
        },
    );
    group.run();
 }
 fn main() {
    bench_access();
    bench_range();
 }
--- a/columnar/columnar-cli-inspect/Cargo.toml
+++ b/columnar/columnar-cli-inspect/Cargo.toml
@@ -1,18 +0,0 @@
 [package]
 name = "tantivy-columnar-inspect"
 version = "0.1.0"
 edition = "2021"
 license = "MIT"
 [dependencies]
 tantivy = {path="../..", package="tantivy"}
 columnar = {path="../", package="tantivy-columnar"}
 common = {path="../../common", package="tantivy-common"}
 [workspace]
 members = []
 [profile.release]
 debug = true
 #debug-assertions = true
 #overflow-checks = true
--- a/columnar/columnar-cli-inspect/src/main.rs
+++ b/columnar/columnar-cli-inspect/src/main.rs
@@ -1,54 +0,0 @@
 use columnar::ColumnarReader;
 use common::file_slice::{FileSlice, WrapFile};
 use std::io;
 use std::path::Path;
 use tantivy::directory::footer::Footer;
 fn main() -> io::Result<()> {
    println!("Opens a columnar file written by tantivy and validates it.");
    let path = std::env::args().nth(1).unwrap();
    let path = Path::new(&path);
    println!("Reading {:?}", path);
    let _reader = open_and_validate_columnar(path.to_str().unwrap())?;
    Ok(())
 }
 pub fn validate_columnar_reader(reader: &ColumnarReader) {
    let num_rows = reader.num_rows();
    println!("num_rows: {}", num_rows);
    let columns = reader.list_columns().unwrap();
    println!("num columns: {:?}", columns.len());
    for (col_name, dynamic_column_handle) in columns {
        let col = dynamic_column_handle.open().unwrap();
        match col {
            columnar::DynamicColumn::Bool(_)
            | columnar::DynamicColumn::I64(_)
            | columnar::DynamicColumn::U64(_)
            | columnar::DynamicColumn::F64(_)
            | columnar::DynamicColumn::IpAddr(_)
            | columnar::DynamicColumn::DateTime(_)
            | columnar::DynamicColumn::Bytes(_) => {}
            columnar::DynamicColumn::Str(str_column) => {
                let num_vals = str_column.ords().values.num_vals();
                let num_terms_dict = str_column.num_terms() as u64;
                let max_ord = str_column.ords().values.iter().max().unwrap_or_default();
                println!("{col_name:35}  num_vals {num_vals:10} \t num_terms_dict {num_terms_dict:8} max_ord: {max_ord:8}",);
                for ord in str_column.ords().values.iter() {
                    assert!(ord < num_terms_dict);
                }
            }
        }
    }
 }
 /// Opens a columnar file that was written by tantivy and validates it.
 pub fn open_and_validate_columnar(path: &str) -> io::Result<ColumnarReader> {
    let wrap_file = WrapFile::new(std::fs::File::open(path)?)?;
    let slice = FileSlice::new(std::sync::Arc::new(wrap_file));
    let (_footer, slice) = Footer::extract_footer(slice.clone()).unwrap();
    let reader = ColumnarReader::open(slice).unwrap();
    validate_columnar_reader(&reader);
    Ok(reader)
 }
--- a/columnar/src/block_accessor.rs
+++ b/columnar/src/block_accessor.rs
@@ -29,20 +29,12 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
        }
    }
    #[inline]
-    pub fn fetch_block_with_missing(
+    pub fn fetch_block_with_missing(&mut self, docs: &[u32], accessor: &Column<T>, missing: T) {
        &mut self,
        docs: &[u32],
        accessor: &Column<T>,
        missing: Option<T>,
    ) {
        self.fetch_block(docs, accessor);
        // no missing values
        if accessor.index.get_cardinality().is_full() {
            return;
        }
        let Some(missing) = missing else {
            return;
        };
        // We can compare docid_cache length with docs to find missing docs
        // For multi value columns we can't rely on the length and always need to scan
@@ -74,7 +66,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
        &'a self,
        docs: &'a [u32],
        accessor: &Column<T>,
-    ) -> impl Iterator<Item = (DocId, T)> + 'a + use<'a, T> {
+    ) -> impl Iterator<Item = (DocId, T)> + '_ {
        if accessor.index.get_cardinality().is_full() {
            docs.iter().cloned().zip(self.val_cache.iter().cloned())
        } else {
@@ -147,7 +139,7 @@ mod tests {
            missing_docs.push(missing_doc);
        });
-        assert_eq!(missing_docs, Vec::<u32>::new());
+        assert_eq!(missing_docs, vec![]);
    }
    #[test]
--- a/columnar/src/column/dictionary_encoded.rs
+++ b/columnar/src/column/dictionary_encoded.rs
@@ -4,8 +4,8 @@ use std::{fmt, io};
 use sstable::{Dictionary, VoidSSTable};
 use crate::RowId;
 use crate::column::Column;
 use crate::RowId;
 /// Dictionary encoded column.
 ///
--- a/columnar/src/column/mod.rs
+++ b/columnar/src/column/mod.rs
@@ -9,14 +9,13 @@ use std::sync::Arc;
 use common::BinarySerializable;
 pub use dictionary_encoded::{BytesColumn, StrColumn};
 pub use serialize::{
-    open_column_bytes, open_column_str, open_column_u64, open_column_u128,
+    open_column_bytes, open_column_str, open_column_u128, open_column_u128_as_compact_u64,
-    open_column_u128_as_compact_u64, serialize_column_mappable_to_u64,
+    open_column_u64, serialize_column_mappable_to_u128, serialize_column_mappable_to_u64,
    serialize_column_mappable_to_u128,
 };
 use crate::column_index::{ColumnIndex, Set};
 use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
-use crate::column_values::{ColumnValues, monotonic_map_column};
+use crate::column_values::{monotonic_map_column, ColumnValues};
 use crate::{Cardinality, DocId, EmptyColumnValues, MonotonicallyMappableToU64, RowId};
 #[derive(Clone)]
@@ -85,8 +84,8 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
    }
    #[inline]
-    pub fn first(&self, doc_id: DocId) -> Option<T> {
+    pub fn first(&self, row_id: RowId) -> Option<T> {
-        self.values_for_doc(doc_id).next()
+        self.values_for_doc(row_id).next()
    }
    /// Load the first value for each docid in the provided slice.
@@ -114,7 +113,7 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        }
    }
-    /// Translates a block of docids to row_ids.
+    /// Translates a block of docis to row_ids.
    ///
    /// returns the row_ids and the matching docids on the same index
    /// e.g.
@@ -131,8 +130,6 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
        self.index.docids_to_rowids(doc_ids, doc_ids_out, row_ids)
    }
    /// Get an iterator over the values for the provided docid.
    #[inline]
    pub fn values_for_doc(&self, doc_id: DocId) -> impl Iterator<Item = T> + '_ {
        self.index
            .value_row_ids(doc_id)
@@ -160,6 +157,15 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
            .select_batch_in_place(selected_docid_range.start, doc_ids);
    }
    /// Fills the output vector with the (possibly multiple values that are associated_with
    /// `row_id`.
    ///
    /// This method clears the `output` vector.
    pub fn fill_vals(&self, row_id: RowId, output: &mut Vec<T>) {
        output.clear();
        output.extend(self.values_for_doc(row_id));
    }
    pub fn first_or_default_col(self, default_value: T) -> Arc<dyn ColumnValues<T>> {
        Arc::new(FirstValueWithDefault {
            column: self,
--- a/columnar/src/column/serialize.rs
+++ b/columnar/src/column/serialize.rs
@@ -6,10 +6,10 @@ use common::OwnedBytes;
 use sstable::Dictionary;
 use crate::column::{BytesColumn, Column};
-use crate::column_index::{SerializableColumnIndex, serialize_column_index};
+use crate::column_index::{serialize_column_index, SerializableColumnIndex};
 use crate::column_values::{
    CodecType, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
    load_u64_based_column_values, serialize_column_values_u128, serialize_u64_based_column_values,
    CodecType, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
 };
 use crate::iterable::Iterable;
 use crate::{StrColumn, Version};
--- a/columnar/src/column_index/merge/mod.rs
+++ b/columnar/src/column_index/merge/mod.rs
@@ -99,9 +99,9 @@ mod tests {
    use crate::column_index::merge::detect_cardinality;
    use crate::column_index::multivalued_index::{
-        MultiValueIndex, open_multivalued_index, serialize_multivalued_index,
+        open_multivalued_index, serialize_multivalued_index, MultiValueIndex,
    };
-    use crate::column_index::{OptionalIndex, SerializableColumnIndex, merge_column_index};
+    use crate::column_index::{merge_column_index, OptionalIndex, SerializableColumnIndex};
    use crate::{
        Cardinality, ColumnIndex, MergeRowOrder, RowAddr, RowId, ShuffleMergeOrder, StackMergeOrder,
    };
--- a/columnar/src/column_index/merge/shuffled.rs
+++ b/columnar/src/column_index/merge/shuffled.rs
@@ -58,7 +58,7 @@ struct ShuffledIndex<'a> {
    merge_order: &'a ShuffleMergeOrder,
 }
-impl Iterable<u32> for ShuffledIndex<'_> {
+impl<'a> Iterable<u32> for ShuffledIndex<'a> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
        Box::new(
            self.merge_order
@@ -127,7 +127,7 @@ fn integrate_num_vals(num_vals: impl Iterator<Item = u32>) -> impl Iterator<Item
    )
 }
-impl Iterable<u32> for ShuffledMultivaluedIndex<'_> {
+impl<'a> Iterable<u32> for ShuffledMultivaluedIndex<'a> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
        let num_vals_per_row = iter_num_values(self.column_indexes, self.merge_order);
        Box::new(integrate_num_vals(num_vals_per_row))
@@ -137,8 +137,8 @@ impl Iterable<u32> for ShuffledMultivaluedIndex<'_> {
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::RowAddr;
    use crate::column_index::OptionalIndex;
    use crate::RowAddr;
    #[test]
    fn test_integrate_num_vals_empty() {
--- a/columnar/src/column_index/merge/stacked.rs
+++ b/columnar/src/column_index/merge/stacked.rs
@@ -1,8 +1,8 @@
 use std::ops::Range;
 use crate::column_index::SerializableColumnIndex;
 use crate::column_index::multivalued_index::{MultiValueIndex, SerializableMultivalueIndex};
 use crate::column_index::serialize::SerializableOptionalIndex;
 use crate::column_index::SerializableColumnIndex;
 use crate::iterable::Iterable;
 use crate::{Cardinality, ColumnIndex, RowId, StackMergeOrder};
@@ -56,7 +56,7 @@ fn get_doc_ids_with_values<'a>(
        ColumnIndex::Full => Box::new(doc_range),
        ColumnIndex::Optional(optional_index) => Box::new(
            optional_index
-                .iter_non_null_docs()
+                .iter_rows()
                .map(move |row| row + doc_range.start),
        ),
        ColumnIndex::Multivalued(multivalued_index) => match multivalued_index {
@@ -73,7 +73,7 @@ fn get_doc_ids_with_values<'a>(
            MultiValueIndex::MultiValueIndexV2(multivalued_index) => Box::new(
                multivalued_index
                    .optional_index
-                    .iter_non_null_docs()
+                    .iter_rows()
                    .map(move |row| row + doc_range.start),
            ),
        },
@@ -105,11 +105,10 @@ fn get_num_values_iterator<'a>(
 ) -> Box<dyn Iterator<Item = u32> + 'a> {
    match column_index {
        ColumnIndex::Empty { .. } => Box::new(std::iter::empty()),
-        ColumnIndex::Full => Box::new(std::iter::repeat_n(1u32, num_docs as usize)),
+        ColumnIndex::Full => Box::new(std::iter::repeat(1u32).take(num_docs as usize)),
-        ColumnIndex::Optional(optional_index) => Box::new(std::iter::repeat_n(
+        ColumnIndex::Optional(optional_index) => {
-            1u32,
+            Box::new(std::iter::repeat(1u32).take(optional_index.num_non_nulls() as usize))
-            optional_index.num_non_nulls() as usize,
+        }
        )),
        ColumnIndex::Multivalued(multivalued_index) => Box::new(
            multivalued_index
                .get_start_index_column()
@@ -124,7 +123,7 @@ fn get_num_values_iterator<'a>(
    }
 }
-impl Iterable<u32> for StackedStartOffsets<'_> {
+impl<'a> Iterable<u32> for StackedStartOffsets<'a> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
        let num_values_it = (0..self.column_indexes.len()).flat_map(|columnar_id| {
            let num_docs = self.stack_merge_order.columnar_range(columnar_id).len() as u32;
@@ -178,7 +177,7 @@ impl<'a> Iterable<RowId> for StackedOptionalIndex<'a> {
                        ColumnIndex::Full => Box::new(columnar_row_range),
                        ColumnIndex::Optional(optional_index) => Box::new(
                            optional_index
-                                .iter_non_null_docs()
+                                .iter_rows()
                                .map(move |row_id: RowId| columnar_row_range.start + row_id),
                        ),
                        ColumnIndex::Multivalued(_) => {
--- a/columnar/src/column_index/mod.rs
+++ b/columnar/src/column_index/mod.rs
@@ -14,7 +14,7 @@ pub use merge::merge_column_index;
 pub(crate) use multivalued_index::SerializableMultivalueIndex;
 pub use optional_index::{OptionalIndex, Set};
 pub use serialize::{
-    SerializableColumnIndex, SerializableOptionalIndex, open_column_index, serialize_column_index,
+    open_column_index, serialize_column_index, SerializableColumnIndex, SerializableOptionalIndex,
 };
 use crate::column_index::multivalued_index::MultiValueIndex;
--- a/columnar/src/column_index/multivalued_index.rs
+++ b/columnar/src/column_index/multivalued_index.rs
@@ -8,7 +8,7 @@ use common::{CountingWriter, OwnedBytes};
 use super::optional_index::{open_optional_index, serialize_optional_index};
 use super::{OptionalIndex, SerializableOptionalIndex, Set};
 use crate::column_values::{
-    CodecType, ColumnValues, load_u64_based_column_values, serialize_u64_based_column_values,
+    load_u64_based_column_values, serialize_u64_based_column_values, CodecType, ColumnValues,
 };
 use crate::iterable::Iterable;
 use crate::{DocId, RowId, Version};
@@ -215,32 +215,6 @@ impl MultiValueIndex {
        }
    }
    /// Returns an iterator over document ids that have at least one value.
    pub fn iter_non_null_docs(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
        match self {
            MultiValueIndex::MultiValueIndexV1(idx) => {
                let mut doc: DocId = 0u32;
                let num_docs = idx.num_docs();
                Box::new(std::iter::from_fn(move || {
                    // This is not the most efficient way to do this, but it's legacy code.
                    while doc < num_docs {
                        let cur = doc;
                        doc += 1;
                        let start = idx.start_index_column.get_val(cur);
                        let end = idx.start_index_column.get_val(cur + 1);
                        if end > start {
                            return Some(cur);
                        }
                    }
                    None
                }))
            }
            MultiValueIndex::MultiValueIndexV2(idx) => {
                Box::new(idx.optional_index.iter_non_null_docs())
            }
        }
    }
    /// Converts a list of ranks (row ids of values) in a 1:n index to the corresponding list of
    /// docids. Positions are converted inplace to docids.
    ///
--- a/columnar/src/column_index/optional_index/mod.rs
+++ b/columnar/src/column_index/optional_index/mod.rs
@@ -1,4 +1,4 @@
-use std::io;
+use std::io::{self, Write};
 use std::sync::Arc;
 mod set;
@@ -7,11 +7,11 @@ mod set_block;
 use common::{BinarySerializable, OwnedBytes, VInt};
 pub use set::{SelectCursor, Set, SetCodec};
 use set_block::{
-    DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec,
+    DenseBlock, DenseBlockCodec, SparseBlock, SparseBlockCodec, DENSE_BLOCK_NUM_BYTES,
 };
 use crate::iterable::Iterable;
-use crate::{DocId, RowId};
+use crate::{DocId, InvalidData, RowId};
 /// The threshold for for number of elements after which we switch to dense block encoding.
 ///
@@ -80,23 +80,23 @@ impl BlockVariant {
 /// index is the block index. For each block `byte_start` and `offset` is computed.
 #[derive(Clone)]
 pub struct OptionalIndex {
-    num_docs: RowId,
+    num_rows: RowId,
-    num_non_null_docs: RowId,
+    num_non_null_rows: RowId,
    block_data: OwnedBytes,
    block_metas: Arc<[BlockMeta]>,
 }
-impl Iterable<u32> for &OptionalIndex {
+impl<'a> Iterable<u32> for &'a OptionalIndex {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
-        Box::new(self.iter_non_null_docs())
+        Box::new(self.iter_rows())
    }
 }
 impl std::fmt::Debug for OptionalIndex {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        f.debug_struct("OptionalIndex")
-            .field("num_docs", &self.num_docs)
+            .field("num_rows", &self.num_rows)
-            .field("num_non_null_docs", &self.num_non_null_docs)
+            .field("num_non_null_rows", &self.num_non_null_rows)
            .finish_non_exhaustive()
    }
 }
@@ -123,7 +123,7 @@ enum BlockSelectCursor<'a> {
    Sparse(<SparseBlock<'a> as Set<u16>>::SelectCursor<'a>),
 }
-impl BlockSelectCursor<'_> {
+impl<'a> BlockSelectCursor<'a> {
    fn select(&mut self, rank: u16) -> u16 {
        match self {
            BlockSelectCursor::Dense(dense_select_cursor) => dense_select_cursor.select(rank),
@@ -141,7 +141,7 @@ pub struct OptionalIndexSelectCursor<'a> {
    num_null_rows_before_block: RowId,
 }
-impl OptionalIndexSelectCursor<'_> {
+impl<'a> OptionalIndexSelectCursor<'a> {
    fn search_and_load_block(&mut self, rank: RowId) {
        if rank < self.current_block_end_rank {
            // we are already in the right block
@@ -165,7 +165,7 @@ impl OptionalIndexSelectCursor<'_> {
    }
 }
-impl SelectCursor<RowId> for OptionalIndexSelectCursor<'_> {
+impl<'a> SelectCursor<RowId> for OptionalIndexSelectCursor<'a> {
    fn select(&mut self, rank: RowId) -> RowId {
        self.search_and_load_block(rank);
        let index_in_block = (rank - self.num_null_rows_before_block) as u16;
@@ -259,13 +259,11 @@ impl Set<RowId> for OptionalIndex {
 impl OptionalIndex {
    pub fn for_test(num_rows: RowId, row_ids: &[RowId]) -> OptionalIndex {
-        assert!(
+        assert!(row_ids
-            row_ids
+            .last()
-                .last()
+            .copied()
-                .copied()
+            .map(|last_row_id| last_row_id < num_rows)
-                .map(|last_row_id| last_row_id < num_rows)
+            .unwrap_or(true));
                .unwrap_or(true)
        );
        let mut buffer = Vec::new();
        serialize_optional_index(&row_ids, num_rows, &mut buffer).unwrap();
        let bytes = OwnedBytes::new(buffer);
@@ -273,18 +271,17 @@ impl OptionalIndex {
    }
    pub fn num_docs(&self) -> RowId {
-        self.num_docs
+        self.num_rows
    }
    pub fn num_non_nulls(&self) -> RowId {
-        self.num_non_null_docs
+        self.num_non_null_rows
    }
-    pub fn iter_non_null_docs(&self) -> impl Iterator<Item = RowId> + '_ {
+    pub fn iter_rows(&self) -> impl Iterator<Item = RowId> + '_ {
-        // TODO optimize. We could iterate over the blocks directly.
+        // TODO optimize
        // We use the dense value ids and retrieve the doc ids via select.
        let mut select_batch = self.select_cursor();
-        (0..self.num_non_null_docs).map(move |rank| select_batch.select(rank))
+        (0..self.num_non_null_rows).map(move |rank| select_batch.select(rank))
    }
    pub fn select_batch(&self, ranks: &mut [RowId]) {
        let mut select_cursor = self.select_cursor();
@@ -335,6 +332,38 @@ enum Block<'a> {
    Sparse(SparseBlock<'a>),
 }
 #[derive(Debug, Copy, Clone)]
 enum OptionalIndexCodec {
    Dense = 0,
    Sparse = 1,
 }
 impl OptionalIndexCodec {
    fn to_code(self) -> u8 {
        self as u8
    }
    fn try_from_code(code: u8) -> Result<Self, InvalidData> {
        match code {
            0 => Ok(Self::Dense),
            1 => Ok(Self::Sparse),
            _ => Err(InvalidData),
        }
    }
 }
 impl BinarySerializable for OptionalIndexCodec {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_all(&[self.to_code()])
    }
    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
        let optional_codec_code = u8::deserialize(reader)?;
        let optional_codec = Self::try_from_code(optional_codec_code)?;
        Ok(optional_codec)
    }
 }
 fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -> io::Result<()> {
    let is_sparse = is_sparse(block_els.len() as u32);
    if is_sparse {
@@ -476,7 +505,7 @@ fn deserialize_optional_index_block_metadatas(
        non_null_rows_before_block += num_non_null_rows;
    }
    block_metas.resize(
-        num_rows.div_ceil(ELEMENTS_PER_BLOCK) as usize,
+        ((num_rows + ELEMENTS_PER_BLOCK - 1) / ELEMENTS_PER_BLOCK) as usize,
        BlockMeta {
            non_null_rows_before_block,
            start_byte_offset,
@@ -490,15 +519,15 @@ pub fn open_optional_index(bytes: OwnedBytes) -> io::Result<OptionalIndex> {
    let (mut bytes, num_non_empty_blocks_bytes) = bytes.rsplit(2);
    let num_non_empty_block_bytes =
        u16::from_le_bytes(num_non_empty_blocks_bytes.as_slice().try_into().unwrap());
-    let num_docs = VInt::deserialize_u64(&mut bytes)? as u32;
+    let num_rows = VInt::deserialize_u64(&mut bytes)? as u32;
    let block_metas_num_bytes =
        num_non_empty_block_bytes as usize * SERIALIZED_BLOCK_META_NUM_BYTES;
    let (block_data, block_metas) = bytes.rsplit(block_metas_num_bytes);
-    let (block_metas, num_non_null_docs) =
+    let (block_metas, num_non_null_rows) =
-        deserialize_optional_index_block_metadatas(block_metas.as_slice(), num_docs);
+        deserialize_optional_index_block_metadatas(block_metas.as_slice(), num_rows);
    let optional_index = OptionalIndex {
-        num_docs,
+        num_rows,
-        num_non_null_docs,
+        num_non_null_rows,
        block_data,
        block_metas: block_metas.into(),
    };
--- a/columnar/src/column_index/optional_index/set_block/dense.rs
+++ b/columnar/src/column_index/optional_index/set_block/dense.rs
@@ -2,7 +2,7 @@ use std::io::{self, Write};
 use common::BinarySerializable;
-use crate::column_index::optional_index::{ELEMENTS_PER_BLOCK, SelectCursor, Set, SetCodec};
+use crate::column_index::optional_index::{SelectCursor, Set, SetCodec, ELEMENTS_PER_BLOCK};
 #[inline(always)]
 fn get_bit_at(input: u64, n: u16) -> bool {
@@ -23,6 +23,7 @@ fn set_bit_at(input: &mut u64, n: u16) {
 ///
 /// When translating a dense index to the original index, we can use the offset to find the correct
 /// block. Direct computation is not possible, but we can employ a linear or binary search.
 const ELEMENTS_PER_MINI_BLOCK: u16 = 64;
 const MINI_BLOCK_BITVEC_NUM_BYTES: usize = 8;
 const MINI_BLOCK_OFFSET_NUM_BYTES: usize = 2;
@@ -108,7 +109,7 @@ pub struct DenseBlockSelectCursor<'a> {
    dense_block: DenseBlock<'a>,
 }
-impl SelectCursor<u16> for DenseBlockSelectCursor<'_> {
+impl<'a> SelectCursor<u16> for DenseBlockSelectCursor<'a> {
    #[inline]
    fn select(&mut self, rank: u16) -> u16 {
        self.block_id = self
@@ -174,7 +175,7 @@ impl<'a> Set<u16> for DenseBlock<'a> {
    }
 }
-impl DenseBlock<'_> {
+impl<'a> DenseBlock<'a> {
    #[inline]
    fn mini_block(&self, mini_block_id: u16) -> DenseMiniBlock {
        let data_start_pos = mini_block_id as usize * MINI_BLOCK_NUM_BYTES;
--- a/columnar/src/column_index/optional_index/set_block/mod.rs
+++ b/columnar/src/column_index/optional_index/set_block/mod.rs
@@ -1,7 +1,7 @@
 mod dense;
 mod sparse;
-pub use dense::{DENSE_BLOCK_NUM_BYTES, DenseBlock, DenseBlockCodec};
+pub use dense::{DenseBlock, DenseBlockCodec, DENSE_BLOCK_NUM_BYTES};
 pub use sparse::{SparseBlock, SparseBlockCodec};
 #[cfg(test)]
--- a/columnar/src/column_index/optional_index/set_block/sparse.rs
+++ b/columnar/src/column_index/optional_index/set_block/sparse.rs
@@ -31,7 +31,7 @@ impl<'a> SelectCursor<u16> for SparseBlock<'a> {
    }
 }
-impl Set<u16> for SparseBlock<'_> {
+impl<'a> Set<u16> for SparseBlock<'a> {
    type SelectCursor<'b>
        = Self
    where Self: 'b;
@@ -69,7 +69,7 @@ fn get_u16(data: &[u8], byte_position: usize) -> u16 {
    u16::from_le_bytes(bytes)
 }
-impl SparseBlock<'_> {
+impl<'a> SparseBlock<'a> {
    #[inline(always)]
    fn value_at_idx(&self, data: &[u8], idx: u16) -> u16 {
        let start_offset: usize = idx as usize * 2;
@@ -82,7 +82,7 @@ impl SparseBlock<'_> {
    }
    #[inline]
-    #[expect(clippy::comparison_chain)]
+    #[allow(clippy::comparison_chain)]
    // Looks for the element in the block. Returns the positions if found.
    fn binary_search(&self, target: u16) -> Result<u16, u16> {
        let data = &self.0;
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -164,11 +164,7 @@ fn test_optional_index_large() {
 fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
    let optional_index = OptionalIndex::for_test(num_rows, row_ids);
    assert_eq!(optional_index.num_docs(), num_rows);
-    assert!(
+    assert!(optional_index.iter_rows().eq(row_ids.iter().copied()));
        optional_index
            .iter_non_null_docs()
            .eq(row_ids.iter().copied())
    );
 }
 #[test]
@@ -223,3 +219,174 @@ fn test_optional_index_for_tests() {
    assert!(!optional_index.contains(3));
    assert_eq!(optional_index.num_docs(), 4);
 }
 #[cfg(all(test, feature = "unstable"))]
 mod bench {
    use rand::rngs::StdRng;
    use rand::{Rng, SeedableRng};
    use test::Bencher;
    use super::*;
    const TOTAL_NUM_VALUES: u32 = 1_000_000;
    fn gen_bools(fill_ratio: f64) -> OptionalIndex {
        let mut out = Vec::new();
        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
        let vals: Vec<RowId> = (0..TOTAL_NUM_VALUES)
            .map(|_| rng.gen_bool(fill_ratio))
            .enumerate()
            .filter(|(_pos, val)| *val)
            .map(|(pos, _)| pos as RowId)
            .collect();
        serialize_optional_index(&&vals[..], TOTAL_NUM_VALUES, &mut out).unwrap();
        open_optional_index(OwnedBytes::new(out)).unwrap()
    }
    fn random_range_iterator(
        start: u32,
        end: u32,
        avg_step_size: u32,
        avg_deviation: u32,
    ) -> impl Iterator<Item = u32> {
        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
        let mut current = start;
        std::iter::from_fn(move || {
            current += rng.gen_range(avg_step_size - avg_deviation..=avg_step_size + avg_deviation);
            if current >= end {
                None
            } else {
                Some(current)
            }
        })
    }
    fn n_percent_step_iterator(percent: f32, num_values: u32) -> impl Iterator<Item = u32> {
        let ratio = percent / 100.0;
        let step_size = (1f32 / ratio) as u32;
        let deviation = step_size - 1;
        random_range_iterator(0, num_values, step_size, deviation)
    }
    fn walk_over_data(codec: &OptionalIndex, avg_step_size: u32) -> Option<u32> {
        walk_over_data_from_positions(
            codec,
            random_range_iterator(0, TOTAL_NUM_VALUES, avg_step_size, 0),
        )
    }
    fn walk_over_data_from_positions(
        codec: &OptionalIndex,
        positions: impl Iterator<Item = u32>,
    ) -> Option<u32> {
        let mut dense_idx: Option<u32> = None;
        for idx in positions {
            dense_idx = dense_idx.or(codec.rank_if_exists(idx));
        }
        dense_idx
    }
    #[bench]
    fn bench_translate_orig_to_codec_1percent_filled_10percent_hit(bench: &mut Bencher) {
        let codec = gen_bools(0.01f64);
        bench.iter(|| walk_over_data(&codec, 100));
    }
    #[bench]
    fn bench_translate_orig_to_codec_5percent_filled_10percent_hit(bench: &mut Bencher) {
        let codec = gen_bools(0.05f64);
        bench.iter(|| walk_over_data(&codec, 100));
    }
    #[bench]
    fn bench_translate_orig_to_codec_5percent_filled_1percent_hit(bench: &mut Bencher) {
        let codec = gen_bools(0.05f64);
        bench.iter(|| walk_over_data(&codec, 1000));
    }
    #[bench]
    fn bench_translate_orig_to_codec_full_scan_1percent_filled(bench: &mut Bencher) {
        let codec = gen_bools(0.01f64);
        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
    }
    #[bench]
    fn bench_translate_orig_to_codec_full_scan_10percent_filled(bench: &mut Bencher) {
        let codec = gen_bools(0.1f64);
        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
    }
    #[bench]
    fn bench_translate_orig_to_codec_full_scan_90percent_filled(bench: &mut Bencher) {
        let codec = gen_bools(0.9f64);
        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
    }
    #[bench]
    fn bench_translate_orig_to_codec_10percent_filled_1percent_hit(bench: &mut Bencher) {
        let codec = gen_bools(0.1f64);
        bench.iter(|| walk_over_data(&codec, 100));
    }
    #[bench]
    fn bench_translate_orig_to_codec_50percent_filled_1percent_hit(bench: &mut Bencher) {
        let codec = gen_bools(0.5f64);
        bench.iter(|| walk_over_data(&codec, 100));
    }
    #[bench]
    fn bench_translate_orig_to_codec_90percent_filled_1percent_hit(bench: &mut Bencher) {
        let codec = gen_bools(0.9f64);
        bench.iter(|| walk_over_data(&codec, 100));
    }
    #[bench]
    fn bench_translate_codec_to_orig_1percent_filled_0comma005percent_hit(bench: &mut Bencher) {
        bench_translate_codec_to_orig_util(0.01f64, 0.005f32, bench);
    }
    #[bench]
    fn bench_translate_codec_to_orig_10percent_filled_0comma005percent_hit(bench: &mut Bencher) {
        bench_translate_codec_to_orig_util(0.1f64, 0.005f32, bench);
    }
    #[bench]
    fn bench_translate_codec_to_orig_1percent_filled_10percent_hit(bench: &mut Bencher) {
        bench_translate_codec_to_orig_util(0.01f64, 10f32, bench);
    }
    #[bench]
    fn bench_translate_codec_to_orig_1percent_filled_full_scan(bench: &mut Bencher) {
        bench_translate_codec_to_orig_util(0.01f64, 100f32, bench);
    }
    fn bench_translate_codec_to_orig_util(
        percent_filled: f64,
        percent_hit: f32,
        bench: &mut Bencher,
    ) {
        let codec = gen_bools(percent_filled);
        let num_non_nulls = codec.num_non_nulls();
        let idxs: Vec<u32> = if percent_hit == 100.0f32 {
            (0..num_non_nulls).collect()
        } else {
            n_percent_step_iterator(percent_hit, num_non_nulls).collect()
        };
        let mut output = vec![0u32; idxs.len()];
        bench.iter(|| {
            output.copy_from_slice(&idxs[..]);
            codec.select_batch(&mut output);
        });
    }
    #[bench]
    fn bench_translate_codec_to_orig_90percent_filled_0comma005percent_hit(bench: &mut Bencher) {
        bench_translate_codec_to_orig_util(0.9f64, 0.005, bench);
    }
    #[bench]
    fn bench_translate_codec_to_orig_90percent_filled_full_scan(bench: &mut Bencher) {
        bench_translate_codec_to_orig_util(0.9f64, 100.0f32, bench);
    }
 }
--- a/columnar/src/column_index/serialize.rs
+++ b/columnar/src/column_index/serialize.rs
@@ -3,11 +3,11 @@ use std::io::Write;
 use common::{CountingWriter, OwnedBytes};
 use super::OptionalIndex;
 use super::multivalued_index::SerializableMultivalueIndex;
-use crate::column_index::ColumnIndex;
+use super::OptionalIndex;
 use crate::column_index::multivalued_index::serialize_multivalued_index;
 use crate::column_index::optional_index::serialize_optional_index;
 use crate::column_index::ColumnIndex;
 use crate::iterable::Iterable;
 use crate::{Cardinality, RowId, Version};
@@ -31,7 +31,7 @@ pub enum SerializableColumnIndex<'a> {
    Multivalued(SerializableMultivalueIndex<'a>),
 }
-impl SerializableColumnIndex<'_> {
+impl<'a> SerializableColumnIndex<'a> {
    pub fn get_cardinality(&self) -> Cardinality {
        match self {
            SerializableColumnIndex::Full => Cardinality::Full,
--- a/columnar/src/column_values/bench.rs
+++ b/columnar/src/column_values/bench.rs
@@ -0,0 +1,139 @@
 use std::sync::Arc;
 use common::OwnedBytes;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use test::{self, Bencher};
 use super::*;
 use crate::column_values::u64_based::*;
 fn get_data() -> Vec<u64> {
    let mut rng = StdRng::seed_from_u64(2u64);
    let mut data: Vec<_> = (100..55000_u64)
        .map(|num| num + rng.gen::<u8>() as u64)
        .collect();
    data.push(99_000);
    data.insert(1000, 2000);
    data.insert(2000, 100);
    data.insert(3000, 4100);
    data.insert(4000, 100);
    data.insert(5000, 800);
    data
 }
 fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
    let mut stats_collector = StatsCollector::default();
    for val in vals {
        stats_collector.collect(val);
    }
    stats_collector.stats()
 }
 #[inline(never)]
 fn value_iter() -> impl Iterator<Item = u64> {
    0..20_000
 }
 fn get_reader_for_bench<Codec: ColumnCodec>(data: &[u64]) -> Codec::ColumnValues {
    let mut bytes = Vec::new();
    let stats = compute_stats(data.iter().cloned());
    let mut codec_serializer = Codec::estimator();
    for val in data {
        codec_serializer.collect(*val);
    }
    codec_serializer
        .serialize(&stats, Box::new(data.iter().copied()).as_mut(), &mut bytes)
        .unwrap();
    Codec::load(OwnedBytes::new(bytes)).unwrap()
 }
 fn bench_get<Codec: ColumnCodec>(b: &mut Bencher, data: &[u64]) {
    let col = get_reader_for_bench::<Codec>(data);
    b.iter(|| {
        let mut sum = 0u64;
        for pos in value_iter() {
            let val = col.get_val(pos as u32);
            sum = sum.wrapping_add(val);
        }
        sum
    });
 }
 #[inline(never)]
 fn bench_get_dynamic_helper(b: &mut Bencher, col: Arc<dyn ColumnValues>) {
    b.iter(|| {
        let mut sum = 0u64;
        for pos in value_iter() {
            let val = col.get_val(pos as u32);
            sum = sum.wrapping_add(val);
        }
        sum
    });
 }
 fn bench_get_dynamic<Codec: ColumnCodec>(b: &mut Bencher, data: &[u64]) {
    let col = Arc::new(get_reader_for_bench::<Codec>(data));
    bench_get_dynamic_helper(b, col);
 }
 fn bench_create<Codec: ColumnCodec>(b: &mut Bencher, data: &[u64]) {
    let stats = compute_stats(data.iter().cloned());
    let mut bytes = Vec::new();
    b.iter(|| {
        bytes.clear();
        let mut codec_serializer = Codec::estimator();
        for val in data.iter().take(1024) {
            codec_serializer.collect(*val);
        }
        codec_serializer.serialize(&stats, Box::new(data.iter().copied()).as_mut(), &mut bytes)
    });
 }
 #[bench]
 fn bench_fastfield_bitpack_create(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_create::<BitpackedCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_create::<LinearCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_create::<BlockwiseLinearCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_bitpack_get(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_get::<BitpackedCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_bitpack_get_dynamic(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_get_dynamic::<BitpackedCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_get::<LinearCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_linearinterpol_get_dynamic(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_get_dynamic::<LinearCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_get::<BlockwiseLinearCodec>(b, &data);
 }
 #[bench]
 fn bench_fastfield_multilinearinterpol_get_dynamic(b: &mut Bencher) {
    let data: Vec<_> = get_data();
    bench_get_dynamic::<BlockwiseLinearCodec>(b, &data);
 }
--- a/columnar/src/column_values/merge.rs
+++ b/columnar/src/column_values/merge.rs
@@ -10,7 +10,7 @@ pub(crate) struct MergedColumnValues<'a, T> {
    pub(crate) merge_row_order: &'a MergeRowOrder,
 }
-impl<T: Copy + PartialOrd + Debug + 'static> Iterable<T> for MergedColumnValues<'_, T> {
+impl<'a, T: Copy + PartialOrd + Debug + 'static> Iterable<T> for MergedColumnValues<'a, T> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
        match self.merge_row_order {
            MergeRowOrder::Stack(_) => Box::new(
--- a/columnar/src/column_values/mod.rs
+++ b/columnar/src/column_values/mod.rs
@@ -26,13 +26,13 @@ mod monotonic_column;
 pub(crate) use merge::MergedColumnValues;
 pub use stats::ColumnStats;
 pub use u64_based::{
    ALL_U64_CODEC_TYPES, CodecType, load_u64_based_column_values,
    serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
 };
 pub use u128_based::{
-    CompactSpaceU64Accessor, open_u128_as_compact_u64, open_u128_mapped,
+    open_u128_as_compact_u64, open_u128_mapped, serialize_column_values_u128,
-    serialize_column_values_u128,
+    CompactSpaceU64Accessor,
 };
 pub use u64_based::{
    load_u64_based_column_values, serialize_and_load_u64_based_column_values,
    serialize_u64_based_column_values, CodecType, ALL_U64_CODEC_TYPES,
 };
 pub use vec_column::VecColumn;
@@ -242,3 +242,6 @@ impl<T: Copy + PartialOrd + Debug + 'static> ColumnValues<T> for Arc<dyn ColumnV
            .get_row_ids_for_value_range(range, doc_id_range, positions)
    }
 }
 #[cfg(all(test, feature = "unstable"))]
 mod bench;
--- a/columnar/src/column_values/monotonic_column.rs
+++ b/columnar/src/column_values/monotonic_column.rs
@@ -2,8 +2,8 @@ use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::ops::{Range, RangeInclusive};
 use crate::ColumnValues;
 use crate::column_values::monotonic_mapping::StrictlyMonotonicFn;
 use crate::ColumnValues;
 struct MonotonicMappingColumn<C, T, Input> {
    from_column: C,
@@ -99,10 +99,10 @@ where
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::column_values::VecColumn;
    use crate::column_values::monotonic_mapping::{
        StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
    };
    use crate::column_values::VecColumn;
    #[test]
    fn test_monotonic_mapping_iter() {
--- a/columnar/src/column_values/monotonic_mapping_u128.rs
+++ b/columnar/src/column_values/monotonic_mapping_u128.rs
@@ -1,7 +1,7 @@
 use std::fmt::Debug;
 use std::net::Ipv6Addr;
-/// Monotonic maps a value to u128 value space
+/// Montonic maps a value to u128 value space
 /// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space.
 pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Debug + Send + Sync {
    /// Converts a value to u128.
--- a/columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
+++ b/columnar/src/column_values/u128_based/compact_space/build_compact_space.rs
@@ -185,10 +185,10 @@ impl CompactSpaceBuilder {
        let mut covered_space = Vec::with_capacity(self.blanks.len());
        // beginning of the blanks
-        if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start)
+        if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start) {
-            && *first_blank_start != 0
+            if *first_blank_start != 0 {
-        {
+                covered_space.push(0..=first_blank_start - 1);
-            covered_space.push(0..=first_blank_start - 1);
+            }
        }
        // Between the blanks
@@ -202,10 +202,10 @@ impl CompactSpaceBuilder {
        covered_space.extend(between_blanks);
        // end of the blanks
-        if let Some(last_blank_end) = self.blanks.last().map(RangeInclusive::end)
+        if let Some(last_blank_end) = self.blanks.last().map(RangeInclusive::end) {
-            && *last_blank_end != u128::MAX
+            if *last_blank_end != u128::MAX {
-        {
+                covered_space.push(last_blank_end + 1..=u128::MAX);
-            covered_space.push(last_blank_end + 1..=u128::MAX);
+            }
        }
        if covered_space.is_empty() {
--- a/columnar/src/column_values/u128_based/compact_space/mod.rs
+++ b/columnar/src/column_values/u128_based/compact_space/mod.rs
@@ -24,8 +24,8 @@ use build_compact_space::get_compact_space;
 use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
 use tantivy_bitpacker::{BitPacker, BitUnpacker};
 use crate::RowId;
 use crate::column_values::ColumnValues;
 use crate::RowId;
 /// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
 /// blanks depends on the number of blanks.
@@ -653,14 +653,12 @@ mod tests {
            ),
            &[3]
        );
-        assert!(
+        assert!(get_positions_for_value_range_helper(
-            get_positions_for_value_range_helper(
+            &decomp,
-                &decomp,
+            99998u128..=99998u128,
-                99998u128..=99998u128,
+            complete_range.clone()
-                complete_range.clone()
+        )
-            )
+        .is_empty());
            .is_empty()
        );
        assert_eq!(
            &get_positions_for_value_range_helper(
                &decomp,
--- a/columnar/src/column_values/u128_based/mod.rs
+++ b/columnar/src/column_values/u128_based/mod.rs
@@ -128,13 +128,13 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn Col
 }
 #[cfg(test)]
-pub(crate) mod tests {
+pub mod tests {
    use super::*;
    use crate::column_values::CodecType;
    use crate::column_values::u64_based::{
-        ALL_U64_CODEC_TYPES, serialize_and_load_u64_based_column_values,
+        serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
-        serialize_u64_based_column_values,
+        ALL_U64_CODEC_TYPES,
    };
    use crate::column_values::CodecType;
    #[test]
    fn test_serialize_deserialize_u128_header() {
--- a/columnar/src/column_values/u64_based/bitpacked.rs
+++ b/columnar/src/column_values/u64_based/bitpacked.rs
@@ -4,7 +4,7 @@ use std::ops::{Range, RangeInclusive};
 use common::{BinarySerializable, OwnedBytes};
 use fastdivide::DividerU64;
-use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
 use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
 use crate::{ColumnValues, RowId};
@@ -23,7 +23,11 @@ const fn div_ceil(n: u64, q: NonZeroU64) -> u64 {
    // copied from unstable rust standard library.
    let d = n / q.get();
    let r = n % q.get();
-    if r > 0 { d + 1 } else { d }
+    if r > 0 {
        d + 1
    } else {
        d
    }
 }
 // The bitpacked codec applies a linear transformation `f` over data that are bitpacked.
@@ -41,6 +45,12 @@ fn transform_range_before_linear_transformation(
    if range.is_empty() {
        return None;
    }
    if stats.min_value > *range.end() {
        return None;
    }
    if stats.max_value < *range.start() {
        return None;
    }
    let shifted_range =
        range.start().saturating_sub(stats.min_value)..=range.end().saturating_sub(stats.min_value);
    let start_before_gcd_multiplication: u64 = div_ceil(*shifted_range.start(), stats.gcd);
@@ -99,7 +109,7 @@ impl ColumnCodecEstimator for BitpackedCodecEstimator {
    fn estimate(&self, stats: &ColumnStats) -> Option<u64> {
        let num_bits_per_value = num_bits(stats);
-        Some(stats.num_bytes() + (stats.num_rows as u64 * (num_bits_per_value as u64)).div_ceil(8))
+        Some(stats.num_bytes() + (stats.num_rows as u64 * (num_bits_per_value as u64) + 7) / 8)
    }
    fn serialize(
--- a/columnar/src/column_values/u64_based/blockwise_linear.rs
+++ b/columnar/src/column_values/u64_based/blockwise_linear.rs
@@ -4,12 +4,12 @@ use std::{io, iter};
 use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
 use fastdivide::DividerU64;
-use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
 use crate::MonotonicallyMappableToU64;
 use crate::column_values::u64_based::line::Line;
 use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
 use crate::column_values::{ColumnValues, VecColumn};
 use crate::MonotonicallyMappableToU64;
 const BLOCK_SIZE: u32 = 512u32;
@@ -39,7 +39,7 @@ impl BinarySerializable for Block {
 }
 fn compute_num_blocks(num_vals: u32) -> u32 {
-    num_vals.div_ceil(BLOCK_SIZE)
+    (num_vals + BLOCK_SIZE - 1) / BLOCK_SIZE
 }
 pub struct BlockwiseLinearEstimator {
--- a/columnar/src/column_values/u64_based/line.rs
+++ b/columnar/src/column_values/u64_based/line.rs
@@ -8,7 +8,7 @@ use crate::column_values::ColumnValues;
 const MID_POINT: u64 = (1u64 << 32) - 1u64;
 /// `Line` describes a line function `y: ax + b` using integer
-/// arithmetic.
+/// arithmetics.
 ///
 /// The slope is in fact a decimal split into a 32 bit integer value,
 /// and a 32-bit decimal value.
@@ -94,7 +94,7 @@ impl Line {
        // `(i, ys[])`.
        //
        // The best intercept therefore has the form
-        // `y[i] - line.eval(i)` (using wrapping arithmetic).
+        // `y[i] - line.eval(i)` (using wrapping arithmetics).
        // In other words, the best intercept is one of the `y - Line::eval(ys[i])`
        // and our task is just to pick the one that minimizes our error.
        //
--- a/columnar/src/column_values/u64_based/linear.rs
+++ b/columnar/src/column_values/u64_based/linear.rs
@@ -1,13 +1,13 @@
 use std::io;
 use common::{BinarySerializable, OwnedBytes};
-use tantivy_bitpacker::{BitPacker, BitUnpacker, compute_num_bits};
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
 use super::ColumnValues;
 use super::line::Line;
-use crate::RowId;
+use super::ColumnValues;
 use crate::column_values::VecColumn;
 use crate::column_values::u64_based::{ColumnCodec, ColumnCodecEstimator, ColumnStats};
 use crate::column_values::VecColumn;
 use crate::RowId;
 const HALF_SPACE: u64 = u64::MAX / 2;
 const LINE_ESTIMATION_BLOCK_LEN: usize = 512;
@@ -117,7 +117,7 @@ impl ColumnCodecEstimator for LinearCodecEstimator {
        Some(
            stats.num_bytes()
                + linear_params.num_bytes()
-                + (num_bits as u64 * stats.num_rows as u64).div_ceil(8),
+                + (num_bits as u64 * stats.num_rows as u64 + 7) / 8,
        )
    }
--- a/columnar/src/column_values/u64_based/mod.rs
+++ b/columnar/src/column_values/u64_based/mod.rs
@@ -17,7 +17,7 @@ pub use crate::column_values::u64_based::bitpacked::BitpackedCodec;
 pub use crate::column_values::u64_based::blockwise_linear::BlockwiseLinearCodec;
 pub use crate::column_values::u64_based::linear::LinearCodec;
 pub use crate::column_values::u64_based::stats_collector::StatsCollector;
-use crate::column_values::{ColumnStats, monotonic_map_column};
+use crate::column_values::{monotonic_map_column, ColumnStats};
 use crate::iterable::Iterable;
 use crate::{ColumnValues, MonotonicallyMappableToU64};
@@ -52,7 +52,7 @@ pub trait ColumnCodecEstimator<T = u64>: 'static {
    ) -> io::Result<()>;
 }
-/// A column codec describes a column serialization format.
+/// A column codec describes a colunm serialization format.
 pub trait ColumnCodec<T: PartialOrd = u64> {
    /// Specialized `ColumnValues` type.
    type ColumnValues: ColumnValues<T> + 'static;
--- a/columnar/src/column_values/u64_based/stats_collector.rs
+++ b/columnar/src/column_values/u64_based/stats_collector.rs
@@ -2,8 +2,8 @@ use std::num::NonZeroU64;
 use fastdivide::DividerU64;
 use crate::RowId;
 use crate::column_values::ColumnStats;
 use crate::RowId;
 /// Compute the gcd of two non null numbers.
 ///
@@ -96,8 +96,8 @@ impl StatsCollector {
 mod tests {
    use std::num::NonZeroU64;
    use crate::column_values::u64_based::stats_collector::{compute_gcd, StatsCollector};
    use crate::column_values::u64_based::ColumnStats;
    use crate::column_values::u64_based::stats_collector::{StatsCollector, compute_gcd};
    fn compute_stats(vals: impl Iterator<Item = u64>) -> ColumnStats {
        let mut stats_collector = StatsCollector::default();
--- a/columnar/src/column_values/u64_based/tests.rs
+++ b/columnar/src/column_values/u64_based/tests.rs
@@ -1,6 +1,5 @@
 use proptest::prelude::*;
 use proptest::{prop_oneof, proptest};
 use rand::Rng;
 #[test]
 fn test_serialize_and_load_simple() {
--- a/columnar/src/columnar/column_type.rs
+++ b/columnar/src/columnar/column_type.rs
@@ -4,8 +4,8 @@ use std::net::Ipv6Addr;
 use serde::{Deserialize, Serialize};
 use crate::InvalidData;
 use crate::value::NumericalType;
 use crate::InvalidData;
 /// The column type represents the column type.
 /// Any changes need to be propagated to `COLUMN_TYPES`.
--- a/columnar/src/columnar/merge/merge_dict_column.rs
+++ b/columnar/src/columnar/merge/merge_dict_column.rs
@@ -3,7 +3,7 @@ use std::io::{self, Write};
 use common::{BitSet, CountingWriter, ReadOnlyBitSet};
 use sstable::{SSTable, Streamer, TermOrdinal, VoidSSTable};
-use super::term_merger::{TermMerger, TermsWithSegmentOrd};
+use super::term_merger::TermMerger;
 use crate::column::serialize_column_mappable_to_u64;
 use crate::column_index::SerializableColumnIndex;
 use crate::iterable::Iterable;
@@ -39,7 +39,7 @@ struct RemappedTermOrdinalsValues<'a> {
    merge_row_order: &'a MergeRowOrder,
 }
-impl Iterable for RemappedTermOrdinalsValues<'_> {
+impl<'a> Iterable for RemappedTermOrdinalsValues<'a> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
        match self.merge_row_order {
            MergeRowOrder::Stack(_) => self.boxed_iter_stacked(),
@@ -50,7 +50,7 @@ impl Iterable for RemappedTermOrdinalsValues<'_> {
    }
 }
-impl RemappedTermOrdinalsValues<'_> {
+impl<'a> RemappedTermOrdinalsValues<'a> {
    fn boxed_iter_stacked(&self) -> Box<dyn Iterator<Item = u64> + '_> {
        let iter = self
            .bytes_columns
@@ -126,17 +126,14 @@ fn serialize_merged_dict(
    let mut term_ord_mapping = TermOrdinalMapping::default();
    let mut field_term_streams = Vec::new();
-    for (segment_ord, column_opt) in bytes_columns.iter().enumerate() {
+    for column_opt in bytes_columns.iter() {
        if let Some(column) = column_opt {
            term_ord_mapping.add_segment(column.dictionary.num_terms());
            let terms: Streamer<VoidSSTable> = column.dictionary.stream()?;
-            field_term_streams.push(TermsWithSegmentOrd { terms, segment_ord });
+            field_term_streams.push(terms);
        } else {
            term_ord_mapping.add_segment(0);
-            field_term_streams.push(TermsWithSegmentOrd {
+            field_term_streams.push(Streamer::empty());
                terms: Streamer::empty(),
                segment_ord,
            });
        }
    }
@@ -194,7 +191,6 @@ fn serialize_merged_dict(
 #[derive(Default, Debug)]
 struct TermOrdinalMapping {
    /// Contains the new term ordinals for each segment.
    per_segment_new_term_ordinals: Vec<Vec<TermOrdinal>>,
 }
@@ -209,6 +205,6 @@ impl TermOrdinalMapping {
    }
    fn get_segment(&self, segment_ord: u32) -> &[TermOrdinal] {
-        &self.per_segment_new_term_ordinals[segment_ord as usize]
+        &(self.per_segment_new_term_ordinals[segment_ord as usize])[..]
    }
 }
--- a/columnar/src/columnar/merge/merge_mapping.rs
+++ b/columnar/src/columnar/merge/merge_mapping.rs
@@ -26,7 +26,7 @@ impl StackMergeOrder {
        let mut cumulated_row_ids: Vec<RowId> = Vec::with_capacity(columnars.len());
        let mut cumulated_row_id = 0;
        for columnar in columnars {
-            cumulated_row_id += columnar.num_docs();
+            cumulated_row_id += columnar.num_rows();
            cumulated_row_ids.push(cumulated_row_id);
        }
        StackMergeOrder { cumulated_row_ids }
--- a/columnar/src/columnar/merge/mod.rs
+++ b/columnar/src/columnar/merge/mod.rs
@@ -10,11 +10,11 @@ use std::sync::Arc;
 pub use merge_mapping::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
 use super::writer::ColumnarSerializer;
-use crate::column::{serialize_column_mappable_to_u64, serialize_column_mappable_to_u128};
+use crate::column::{serialize_column_mappable_to_u128, serialize_column_mappable_to_u64};
 use crate::column_values::MergedColumnValues;
 use crate::columnar::ColumnarReader;
 use crate::columnar::merge::merge_dict_column::merge_bytes_or_str_column;
 use crate::columnar::writer::CompatibleNumericalTypes;
 use crate::columnar::ColumnarReader;
 use crate::dynamic_column::DynamicColumn;
 use crate::{
    BytesColumn, Column, ColumnIndex, ColumnType, ColumnValues, DynamicColumnHandle, NumericalType,
@@ -80,12 +80,13 @@ pub fn merge_columnar(
    output: &mut impl io::Write,
 ) -> io::Result<()> {
    let mut serializer = ColumnarSerializer::new(output);
-    let num_docs_per_columnar = columnar_readers
+    let num_rows_per_columnar = columnar_readers
        .iter()
-        .map(|reader| reader.num_docs())
+        .map(|reader| reader.num_rows())
        .collect::<Vec<u32>>();
-    let columns_to_merge = group_columns_for_merge(columnar_readers, required_columns)?;
+    let columns_to_merge =
        group_columns_for_merge(columnar_readers, required_columns, &merge_row_order)?;
    for res in columns_to_merge {
        let ((column_name, _column_type_category), grouped_columns) = res;
        let grouped_columns = grouped_columns.open(&merge_row_order)?;
@@ -93,18 +94,15 @@ pub fn merge_columnar(
            continue;
        }
-        let column_type_after_merge = grouped_columns.column_type_after_merge();
+        let column_type = grouped_columns.column_type_after_merge();
        let mut columns = grouped_columns.columns;
-        // Make sure the number of columns is the same as the number of columnar readers.
+        coerce_columns(column_type, &mut columns)?;
        // Or num_docs_per_columnar would be incorrect.
        assert_eq!(columns.len(), columnar_readers.len());
        coerce_columns(column_type_after_merge, &mut columns)?;
        let mut column_serializer =
-            serializer.start_serialize_column(column_name.as_bytes(), column_type_after_merge);
+            serializer.start_serialize_column(column_name.as_bytes(), column_type);
        merge_column(
-            column_type_after_merge,
+            column_type,
-            &num_docs_per_columnar,
+            &num_rows_per_columnar,
            columns,
            &merge_row_order,
            &mut column_serializer,
@@ -130,7 +128,7 @@ fn dynamic_column_to_u64_monotonic(dynamic_column: DynamicColumn) -> Option<Colu
 fn merge_column(
    column_type: ColumnType,
    num_docs_per_column: &[u32],
-    columns_to_merge: Vec<Option<DynamicColumn>>,
+    columns: Vec<Option<DynamicColumn>>,
    merge_row_order: &MergeRowOrder,
    wrt: &mut impl io::Write,
 ) -> io::Result<()> {
@@ -140,21 +138,20 @@ fn merge_column(
        | ColumnType::F64
        | ColumnType::DateTime
        | ColumnType::Bool => {
-            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns_to_merge.len());
+            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns.len());
            let mut column_values: Vec<Option<Arc<dyn ColumnValues>>> =
-                Vec::with_capacity(columns_to_merge.len());
+                Vec::with_capacity(columns.len());
-            for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
+            for (i, dynamic_column_opt) in columns.into_iter().enumerate() {
-                match dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic) {
+                if let Some(Column { index: idx, values }) =
-                    Some(Column { index: idx, values }) => {
+                    dynamic_column_opt.and_then(dynamic_column_to_u64_monotonic)
-                        column_indexes.push(idx);
+                {
-                        column_values.push(Some(values));
+                    column_indexes.push(idx);
-                    }
+                    column_values.push(Some(values));
-                    None => {
+                } else {
-                        column_indexes.push(ColumnIndex::Empty {
+                    column_indexes.push(ColumnIndex::Empty {
-                            num_docs: num_docs_per_column[i],
+                        num_docs: num_docs_per_column[i],
-                        });
+                    });
-                        column_values.push(None);
+                    column_values.push(None);
                    }
                }
            }
            let merged_column_index =
@@ -167,10 +164,10 @@ fn merge_column(
            serialize_column_mappable_to_u64(merged_column_index, &merge_column_values, wrt)?;
        }
        ColumnType::IpAddr => {
-            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns_to_merge.len());
+            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns.len());
            let mut column_values: Vec<Option<Arc<dyn ColumnValues<Ipv6Addr>>>> =
-                Vec::with_capacity(columns_to_merge.len());
+                Vec::with_capacity(columns.len());
-            for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
+            for (i, dynamic_column_opt) in columns.into_iter().enumerate() {
                if let Some(DynamicColumn::IpAddr(Column { index: idx, values })) =
                    dynamic_column_opt
                {
@@ -195,10 +192,9 @@ fn merge_column(
            serialize_column_mappable_to_u128(merged_column_index, &merge_column_values, wrt)?;
        }
        ColumnType::Bytes | ColumnType::Str => {
-            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns_to_merge.len());
+            let mut column_indexes: Vec<ColumnIndex> = Vec::with_capacity(columns.len());
-            let mut bytes_columns: Vec<Option<BytesColumn>> =
+            let mut bytes_columns: Vec<Option<BytesColumn>> = Vec::with_capacity(columns.len());
-                Vec::with_capacity(columns_to_merge.len());
+            for (i, dynamic_column_opt) in columns.into_iter().enumerate() {
            for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() {
                match dynamic_column_opt {
                    Some(DynamicColumn::Str(str_column)) => {
                        column_indexes.push(str_column.term_ord_column.index.clone());
@@ -252,15 +248,13 @@ impl GroupedColumns {
        if column_type.len() == 1 {
            return column_type.into_iter().next().unwrap();
        }
-        // At the moment, only the numerical column type category has more than one possible
+        // At the moment, only the numerical categorical column type has more than one possible
        // column type.
-        assert!(
+        assert!(self
-            self.columns
+            .columns
-                .iter()
+            .iter()
-                .flatten()
+            .flatten()
-                .all(|el| ColumnTypeCategory::from(el.column_type())
+            .all(|el| ColumnTypeCategory::from(el.column_type()) == ColumnTypeCategory::Numerical));
                    == ColumnTypeCategory::Numerical)
        );
        merged_numerical_columns_type(self.columns.iter().flatten()).into()
    }
 }
@@ -367,7 +361,7 @@ fn is_empty_after_merge(
                    ColumnIndex::Empty { .. } => true,
                    ColumnIndex::Full => alive_bitset.len() == 0,
                    ColumnIndex::Optional(optional_index) => {
-                        for doc in optional_index.iter_non_null_docs() {
+                        for doc in optional_index.iter_rows() {
                            if alive_bitset.contains(doc) {
                                return false;
                            }
@@ -397,6 +391,7 @@ fn is_empty_after_merge(
 fn group_columns_for_merge<'a>(
    columnar_readers: &'a [&'a ColumnarReader],
    required_columns: &'a [(String, ColumnType)],
    _merge_row_order: &'a MergeRowOrder,
 ) -> io::Result<BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle>> {
    let mut columns: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> = BTreeMap::new();
--- a/columnar/src/columnar/merge/term_merger.rs
+++ b/columnar/src/columnar/merge/term_merger.rs
@@ -5,29 +5,28 @@ use sstable::TermOrdinal;
 use crate::Streamer;
-/// The terms of a column with the ordinal of the segment.
+pub struct HeapItem<'a> {
-pub struct TermsWithSegmentOrd<'a> {
+    pub streamer: Streamer<'a>,
    pub terms: Streamer<'a>,
    pub segment_ord: usize,
 }
-impl PartialEq for TermsWithSegmentOrd<'_> {
+impl<'a> PartialEq for HeapItem<'a> {
    fn eq(&self, other: &Self) -> bool {
        self.segment_ord == other.segment_ord
    }
 }
-impl Eq for TermsWithSegmentOrd<'_> {}
+impl<'a> Eq for HeapItem<'a> {}
-impl<'a> PartialOrd for TermsWithSegmentOrd<'a> {
+impl<'a> PartialOrd for HeapItem<'a> {
-    fn partial_cmp(&self, other: &TermsWithSegmentOrd<'a>) -> Option<Ordering> {
+    fn partial_cmp(&self, other: &HeapItem<'a>) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }
-impl<'a> Ord for TermsWithSegmentOrd<'a> {
+impl<'a> Ord for HeapItem<'a> {
-    fn cmp(&self, other: &TermsWithSegmentOrd<'a>) -> Ordering {
+    fn cmp(&self, other: &HeapItem<'a>) -> Ordering {
-        (&other.terms.key(), &other.segment_ord).cmp(&(&self.terms.key(), &self.segment_ord))
+        (&other.streamer.key(), &other.segment_ord).cmp(&(&self.streamer.key(), &self.segment_ord))
    }
 }
@@ -38,32 +37,39 @@ impl<'a> Ord for TermsWithSegmentOrd<'a> {
 /// - the term
 /// - a slice with the ordinal of the segments containing the terms.
 pub struct TermMerger<'a> {
-    heap: BinaryHeap<TermsWithSegmentOrd<'a>>,
+    heap: BinaryHeap<HeapItem<'a>>,
-    term_streams_with_segment: Vec<TermsWithSegmentOrd<'a>>,
+    current_streamers: Vec<HeapItem<'a>>,
 }
 impl<'a> TermMerger<'a> {
    /// Stream of merged term dictionary
-    pub fn new(term_streams_with_segment: Vec<TermsWithSegmentOrd<'a>>) -> TermMerger<'a> {
+    pub fn new(streams: Vec<Streamer<'a>>) -> TermMerger<'a> {
        TermMerger {
            heap: BinaryHeap::new(),
-            term_streams_with_segment,
+            current_streamers: streams
                .into_iter()
                .enumerate()
                .map(|(ord, streamer)| HeapItem {
                    streamer,
                    segment_ord: ord,
                })
                .collect(),
        }
    }
    pub(crate) fn matching_segments<'b: 'a>(
        &'b self,
    ) -> impl 'b + Iterator<Item = (usize, TermOrdinal)> {
-        self.term_streams_with_segment
+        self.current_streamers
            .iter()
-            .map(|heap_item| (heap_item.segment_ord, heap_item.terms.term_ord()))
+            .map(|heap_item| (heap_item.segment_ord, heap_item.streamer.term_ord()))
    }
    fn advance_segments(&mut self) {
-        let streamers = &mut self.term_streams_with_segment;
+        let streamers = &mut self.current_streamers;
        let heap = &mut self.heap;
        for mut heap_item in streamers.drain(..) {
-            if heap_item.terms.advance() {
+            if heap_item.streamer.advance() {
                heap.push(heap_item);
            }
        }
@@ -74,19 +80,18 @@ impl<'a> TermMerger<'a> {
    /// False if there is none.
    pub fn advance(&mut self) -> bool {
        self.advance_segments();
-        match self.heap.pop() {
+        if let Some(head) = self.heap.pop() {
-            Some(head) => {
+            self.current_streamers.push(head);
-                self.term_streams_with_segment.push(head);
+            while let Some(next_streamer) = self.heap.peek() {
-                while let Some(next_streamer) = self.heap.peek() {
+                if self.current_streamers[0].streamer.key() != next_streamer.streamer.key() {
-                    if self.term_streams_with_segment[0].terms.key() != next_streamer.terms.key() {
+                    break;
                        break;
                    }
                    let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
                    self.term_streams_with_segment.push(next_heap_it);
                }
-                true
+                let next_heap_it = self.heap.pop().unwrap(); // safe : we peeked beforehand
                self.current_streamers.push(next_heap_it);
            }
-            _ => false,
+            true
        } else {
            false
        }
    }
@@ -96,6 +101,6 @@ impl<'a> TermMerger<'a> {
    /// if and only if advance() has been called before
    /// and "true" was returned.
    pub fn key(&self) -> &[u8] {
-        self.term_streams_with_segment[0].terms.key()
+        self.current_streamers[0].streamer.key()
    }
 }
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -1,10 +1,7 @@
 use itertools::Itertools;
 use proptest::collection::vec;
 use proptest::prelude::*;
 use super::*;
-use crate::columnar::{ColumnarReader, MergeRowOrder, StackMergeOrder, merge_columnar};
+use crate::{Cardinality, ColumnarWriter, HasAssociatedColumnType, RowId};
 use crate::{Cardinality, ColumnarWriter, DynamicColumn, HasAssociatedColumnType, RowId};
 fn make_columnar<T: Into<NumericalValue> + HasAssociatedColumnType + Copy>(
    column_name: &str,
@@ -29,8 +26,9 @@ fn test_column_coercion_to_u64() {
    // u64 type
    let columnar2 = make_columnar("numbers", &[u64::MAX]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
-        group_columns_for_merge(columnars, &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
@@ -40,8 +38,9 @@ fn test_column_coercion_to_i64() {
    let columnar1 = make_columnar("numbers", &[-1i64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
-        group_columns_for_merge(columnars, &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
@@ -64,8 +63,14 @@ fn test_group_columns_with_required_column() {
    let columnar1 = make_columnar("numbers", &[1i64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
-        group_columns_for_merge(columnars, &[("numbers".to_string(), ColumnType::U64)]).unwrap();
+        group_columns_for_merge(
            &[&columnar1, &columnar2],
            &[("numbers".to_string(), ColumnType::U64)],
            &merge_order,
        )
        .unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
@@ -75,9 +80,13 @@ fn test_group_columns_required_column_with_no_existing_columns() {
    let columnar1 = make_columnar("numbers", &[2u64]);
    let columnar2 = make_columnar("numbers", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
-    let column_map: BTreeMap<_, _> =
+    let merge_order = StackMergeOrder::stack(columnars).into();
-        group_columns_for_merge(columnars, &[("required_col".to_string(), ColumnType::Str)])
+    let column_map: BTreeMap<_, _> = group_columns_for_merge(
-            .unwrap();
+        columnars,
        &[("required_col".to_string(), ColumnType::Str)],
        &merge_order,
    )
    .unwrap();
    assert_eq!(column_map.len(), 2);
    let columns = &column_map
        .get(&("required_col".to_string(), ColumnTypeCategory::Str))
@@ -93,8 +102,14 @@ fn test_group_columns_required_column_is_above_all_columns_have_the_same_type_ru
    let columnar1 = make_columnar("numbers", &[2i64]);
    let columnar2 = make_columnar("numbers", &[2i64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
-        group_columns_for_merge(columnars, &[("numbers".to_string(), ColumnType::U64)]).unwrap();
+        group_columns_for_merge(
            columnars,
            &[("numbers".to_string(), ColumnType::U64)],
            &merge_order,
        )
        .unwrap();
    assert_eq!(column_map.len(), 1);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
 }
@@ -104,8 +119,9 @@ fn test_missing_column() {
    let columnar1 = make_columnar("numbers", &[-1i64]);
    let columnar2 = make_columnar("numbers2", &[2u64]);
    let columnars = &[&columnar1, &columnar2];
    let merge_order = StackMergeOrder::stack(columnars).into();
    let column_map: BTreeMap<(String, ColumnTypeCategory), GroupedColumnsHandle> =
-        group_columns_for_merge(columnars, &[]).unwrap();
+        group_columns_for_merge(columnars, &[], &merge_order).unwrap();
    assert_eq!(column_map.len(), 2);
    assert!(column_map.contains_key(&("numbers".to_string(), ColumnTypeCategory::Numerical)));
    {
@@ -208,7 +224,7 @@ fn test_merge_columnar_numbers() {
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_docs(), 3);
+    assert_eq!(columnar_reader.num_rows(), 3);
    assert_eq!(columnar_reader.num_columns(), 1);
    let cols = columnar_reader.read_columns("numbers").unwrap();
    let dynamic_column = cols[0].open().unwrap();
@@ -236,7 +252,7 @@ fn test_merge_columnar_texts() {
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_docs(), 3);
+    assert_eq!(columnar_reader.num_rows(), 3);
    assert_eq!(columnar_reader.num_columns(), 1);
    let cols = columnar_reader.read_columns("texts").unwrap();
    let dynamic_column = cols[0].open().unwrap();
@@ -285,7 +301,7 @@ fn test_merge_columnar_byte() {
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_docs(), 4);
+    assert_eq!(columnar_reader.num_rows(), 4);
    assert_eq!(columnar_reader.num_columns(), 1);
    let cols = columnar_reader.read_columns("bytes").unwrap();
    let dynamic_column = cols[0].open().unwrap();
@@ -341,7 +357,7 @@ fn test_merge_columnar_byte_with_missing() {
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_docs(), 3 + 2 + 3);
+    assert_eq!(columnar_reader.num_rows(), 3 + 2 + 3);
    assert_eq!(columnar_reader.num_columns(), 2);
    let cols = columnar_reader.read_columns("col").unwrap();
    let dynamic_column = cols[0].open().unwrap();
@@ -393,7 +409,7 @@ fn test_merge_columnar_different_types() {
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_docs(), 4);
+    assert_eq!(columnar_reader.num_rows(), 4);
    assert_eq!(columnar_reader.num_columns(), 2);
    let cols = columnar_reader.read_columns("mixed").unwrap();
@@ -403,11 +419,11 @@ fn test_merge_columnar_different_types() {
        panic!()
    };
    assert_eq!(vals.get_cardinality(), Cardinality::Optional);
-    assert_eq!(vals.values_for_doc(0).collect_vec(), Vec::<i64>::new());
+    assert_eq!(vals.values_for_doc(0).collect_vec(), vec![]);
-    assert_eq!(vals.values_for_doc(1).collect_vec(), Vec::<i64>::new());
+    assert_eq!(vals.values_for_doc(1).collect_vec(), vec![]);
-    assert_eq!(vals.values_for_doc(2).collect_vec(), Vec::<i64>::new());
+    assert_eq!(vals.values_for_doc(2).collect_vec(), vec![]);
    assert_eq!(vals.values_for_doc(3).collect_vec(), vec![1]);
-    assert_eq!(vals.values_for_doc(4).collect_vec(), Vec::<i64>::new());
+    assert_eq!(vals.values_for_doc(4).collect_vec(), vec![]);
    // text column
    let dynamic_column = cols[1].open().unwrap();
@@ -458,7 +474,7 @@ fn test_merge_columnar_different_empty_cardinality() {
    )
    .unwrap();
    let columnar_reader = ColumnarReader::open(buffer).unwrap();
-    assert_eq!(columnar_reader.num_docs(), 2);
+    assert_eq!(columnar_reader.num_rows(), 2);
    assert_eq!(columnar_reader.num_columns(), 2);
    let cols = columnar_reader.read_columns("mixed").unwrap();
@@ -470,119 +486,3 @@ fn test_merge_columnar_different_empty_cardinality() {
    let dynamic_column = cols[1].open().unwrap();
    assert_eq!(dynamic_column.get_cardinality(), Cardinality::Optional);
 }
 #[derive(Debug, Clone)]
 struct ColumnSpec {
    column_name: String,
    /// (row_id, term)
    terms: Vec<(RowId, Vec<u8>)>,
 }
 #[derive(Clone, Debug)]
 struct ColumnarSpec {
    columns: Vec<ColumnSpec>,
 }
 /// Generate a random (row_id, term) pair:
 ///  - row_id in [0..10]
 ///  - term is either from POSSIBLE_TERMS or random bytes
 fn rowid_and_term_strategy() -> impl Strategy<Value = (RowId, Vec<u8>)> {
    const POSSIBLE_TERMS: &[&[u8]] = &[b"a", b"b", b"allo"];
    let term_strat = prop_oneof![
        // pick from the fixed list
        (0..POSSIBLE_TERMS.len()).prop_map(|i| POSSIBLE_TERMS[i].to_vec()),
        // or random bytes (length 0..10)
        prop::collection::vec(any::<u8>(), 0..10),
    ];
    (0u32..11, term_strat)
 }
 /// Generate one ColumnSpec, with a random name and a random list of (row_id, term).
 /// We sort it by row_id so that data is in ascending order.
 fn column_spec_strategy() -> impl Strategy<Value = ColumnSpec> {
    let column_name = prop_oneof![
        Just("col".to_string()),
        Just("col2".to_string()),
        "col.*".prop_map(|s| s),
    ];
    // We'll produce 0..8 (rowid,term) entries for this column
    let data_strat = vec(rowid_and_term_strategy(), 0..8).prop_map(|mut pairs| {
        // Sort by row_id
        pairs.sort_by_key(|(row_id, _)| *row_id);
        pairs
    });
    (column_name, data_strat).prop_map(|(name, data)| ColumnSpec {
        column_name: name,
        terms: data,
    })
 }
 /// Strategy to generate an ColumnarSpec
 fn columnar_strategy() -> impl Strategy<Value = ColumnarSpec> {
    vec(column_spec_strategy(), 0..3).prop_map(|columns| ColumnarSpec { columns })
 }
 /// Strategy to generate multiple ColumnarSpecs, each of which we will treat
 /// as one "columnar" to be merged together.
 fn columnars_strategy() -> impl Strategy<Value = Vec<ColumnarSpec>> {
    vec(columnar_strategy(), 1..4)
 }
 /// Build a `ColumnarReader` from a `ColumnarSpec`
 fn build_columnar(spec: &ColumnarSpec) -> ColumnarReader {
    let mut writer = ColumnarWriter::default();
    let mut max_row_id = 0;
    for col in &spec.columns {
        for &(row_id, ref term) in &col.terms {
            writer.record_bytes(row_id, &col.column_name, term);
            max_row_id = max_row_id.max(row_id);
        }
    }
    let mut buffer = Vec::new();
    writer.serialize(max_row_id + 1, &mut buffer).unwrap();
    ColumnarReader::open(buffer).unwrap()
 }
 proptest! {
    // We just test that the merge_columnar function doesn't crash.
    #![proptest_config(ProptestConfig::with_cases(256))]
    #[test]
    fn test_merge_columnar_bytes_no_crash(columnars in columnars_strategy(), second_merge_columnars in columnars_strategy()) {
        let columnars: Vec<ColumnarReader> = columnars.iter()
            .map(build_columnar)
            .collect();
        let mut out = Vec::new();
        let columnar_refs: Vec<&ColumnarReader> = columnars.iter().collect();
        let stack_merge_order = StackMergeOrder::stack(&columnar_refs);
        merge_columnar(
            &columnar_refs,
            &[],
            MergeRowOrder::Stack(stack_merge_order),
            &mut out,
        ).unwrap();
        let merged_reader = ColumnarReader::open(out).unwrap();
        // Merge the second set of columnars with the result of the first merge
        let mut columnars: Vec<ColumnarReader> = second_merge_columnars.iter()
            .map(build_columnar)
            .collect();
        columnars.push(merged_reader);
        let mut out = Vec::new();
        let columnar_refs: Vec<&ColumnarReader> = columnars.iter().collect();
        let stack_merge_order = StackMergeOrder::stack(&columnar_refs);
        merge_columnar(
            &columnar_refs,
            &[],
            MergeRowOrder::Stack(stack_merge_order),
            &mut out,
        ).unwrap();
    }
 }
--- a/columnar/src/columnar/mod.rs
+++ b/columnar/src/columnar/mod.rs
@@ -5,9 +5,9 @@ mod reader;
 mod writer;
 pub use column_type::{ColumnType, HasAssociatedColumnType};
-pub use format_version::{CURRENT_VERSION, Version};
+pub use format_version::{Version, CURRENT_VERSION};
 #[cfg(test)]
 pub(crate) use merge::ColumnTypeCategory;
-pub use merge::{MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, merge_columnar};
+pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
 pub use reader::ColumnarReader;
 pub use writer::ColumnarWriter;
--- a/columnar/src/columnar/reader/mod.rs
+++ b/columnar/src/columnar/reader/mod.rs
@@ -1,11 +1,10 @@
 use std::{fmt, io, mem};
 use common::BinarySerializable;
 use common::file_slice::FileSlice;
-use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
+use common::BinarySerializable;
 use sstable::{Dictionary, RangeSSTable};
-use crate::columnar::{ColumnType, format_version};
+use crate::columnar::{format_version, ColumnType};
 use crate::dynamic_column::DynamicColumnHandle;
 use crate::{RowId, Version};
@@ -19,13 +18,13 @@ fn io_invalid_data(msg: String) -> io::Error {
 pub struct ColumnarReader {
    column_dictionary: Dictionary<RangeSSTable>,
    column_data: FileSlice,
-    num_docs: RowId,
+    num_rows: RowId,
    format_version: Version,
 }
 impl fmt::Debug for ColumnarReader {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let num_rows = self.num_docs();
+        let num_rows = self.num_rows();
        let columns = self.list_columns().unwrap();
        let num_cols = columns.len();
        let mut debug_struct = f.debug_struct("Columnar");
@@ -77,19 +76,6 @@ fn read_all_columns_in_stream(
    Ok(results)
 }
 fn column_dictionary_prefix_for_column_name(column_name: &str) -> String {
    // Each column is a associated to a given `column_key`,
    // that starts by `column_name\0column_header`.
    //
    // Listing the columns associated to the given column name is therefore equivalent to
    // listing `column_key` with the prefix `column_name\0`.
    format!("{}{}", column_name, '\0')
 }
 fn column_dictionary_prefix_for_subpath(root_path: &str) -> String {
    format!("{}{}", root_path, JSON_PATH_SEGMENT_SEP as char)
 }
 impl ColumnarReader {
    /// Opens a new Columnar file.
    pub fn open<F>(file_slice: F) -> io::Result<ColumnarReader>
@@ -112,13 +98,13 @@ impl ColumnarReader {
        Ok(ColumnarReader {
            column_dictionary,
            column_data,
-            num_docs: num_rows,
+            num_rows,
            format_version,
        })
    }
-    pub fn num_docs(&self) -> RowId {
+    pub fn num_rows(&self) -> RowId {
-        self.num_docs
+        self.num_rows
    }
    // Iterate over the columns in a sorted way
    pub fn iter_columns(
@@ -158,14 +144,32 @@ impl ColumnarReader {
        Ok(self.iter_columns()?.collect())
    }
    fn stream_for_column_range(&self, column_name: &str) -> sstable::StreamerBuilder<RangeSSTable> {
        // Each column is a associated to a given `column_key`,
        // that starts by `column_name\0column_header`.
        //
        // Listing the columns associated to the given column name is therefore equivalent to
        // listing `column_key` with the prefix `column_name\0`.
        //
        // This is in turn equivalent to searching for the range
        // `[column_name,\0`..column_name\1)`.
        // TODO can we get some more generic `prefix(..)` logic in the dictionary.
        let mut start_key = column_name.to_string();
        start_key.push('\0');
        let mut end_key = column_name.to_string();
        end_key.push(1u8 as char);
        self.column_dictionary
            .range()
            .ge(start_key.as_bytes())
            .lt(end_key.as_bytes())
    }
    pub async fn read_columns_async(
        &self,
        column_name: &str,
    ) -> io::Result<Vec<DynamicColumnHandle>> {
        let prefix = column_dictionary_prefix_for_column_name(column_name);
        let stream = self
-            .column_dictionary
+            .stream_for_column_range(column_name)
            .prefix_range(prefix)
            .into_stream_async()
            .await?;
        read_all_columns_in_stream(stream, &self.column_data, self.format_version)
@@ -176,35 +180,7 @@ impl ColumnarReader {
    /// There can be more than one column associated to a given column name, provided they have
    /// different types.
    pub fn read_columns(&self, column_name: &str) -> io::Result<Vec<DynamicColumnHandle>> {
-        let prefix = column_dictionary_prefix_for_column_name(column_name);
+        let stream = self.stream_for_column_range(column_name).into_stream()?;
        let stream = self.column_dictionary.prefix_range(prefix).into_stream()?;
        read_all_columns_in_stream(stream, &self.column_data, self.format_version)
    }
    pub async fn read_subpath_columns_async(
        &self,
        root_path: &str,
    ) -> io::Result<Vec<DynamicColumnHandle>> {
        let prefix = column_dictionary_prefix_for_subpath(root_path);
        let stream = self
            .column_dictionary
            .prefix_range(prefix)
            .into_stream_async()
            .await?;
        read_all_columns_in_stream(stream, &self.column_data, self.format_version)
    }
    /// Get all inner columns for a given JSON prefix, i.e columns for which the name starts
    /// with the prefix then contain the [`JSON_PATH_SEGMENT_SEP`].
    ///
    /// There can be more than one column associated to each path within the JSON structure,
    /// provided they have different types.
    pub fn read_subpath_columns(&self, root_path: &str) -> io::Result<Vec<DynamicColumnHandle>> {
        let prefix = column_dictionary_prefix_for_subpath(root_path);
        let stream = self
            .column_dictionary
            .prefix_range(prefix.as_bytes())
            .into_stream()?;
        read_all_columns_in_stream(stream, &self.column_data, self.format_version)
    }
@@ -216,8 +192,6 @@ impl ColumnarReader {
 #[cfg(test)]
 mod tests {
    use common::json_path_writer::JSON_PATH_SEGMENT_SEP;
    use crate::{ColumnType, ColumnarReader, ColumnarWriter};
    #[test]
@@ -250,64 +224,6 @@ mod tests {
        assert_eq!(columns[0].1.column_type(), ColumnType::U64);
    }
    #[test]
    fn test_read_columns() {
        let mut columnar_writer = ColumnarWriter::default();
        columnar_writer.record_column_type("col", ColumnType::U64, false);
        columnar_writer.record_numerical(1, "col", 1u64);
        let mut buffer = Vec::new();
        columnar_writer.serialize(2, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        {
            let columns = columnar.read_columns("col").unwrap();
            assert_eq!(columns.len(), 1);
            assert_eq!(columns[0].column_type(), ColumnType::U64);
        }
        {
            let columns = columnar.read_columns("other").unwrap();
            assert_eq!(columns.len(), 0);
        }
    }
    #[test]
    fn test_read_subpath_columns() {
        let mut columnar_writer = ColumnarWriter::default();
        columnar_writer.record_str(
            0,
            &format!("col1{}subcol1", JSON_PATH_SEGMENT_SEP as char),
            "hello",
        );
        columnar_writer.record_numerical(
            0,
            &format!("col1{}subcol2", JSON_PATH_SEGMENT_SEP as char),
            1i64,
        );
        columnar_writer.record_str(1, "col1", "hello");
        columnar_writer.record_str(0, "col2", "hello");
        let mut buffer = Vec::new();
        columnar_writer.serialize(2, &mut buffer).unwrap();
        let columnar = ColumnarReader::open(buffer).unwrap();
        {
            let columns = columnar.read_subpath_columns("col1").unwrap();
            assert_eq!(columns.len(), 2);
            assert_eq!(columns[0].column_type(), ColumnType::Str);
            assert_eq!(columns[1].column_type(), ColumnType::I64);
        }
        {
            let columns = columnar.read_subpath_columns("col1.subcol1").unwrap();
            assert_eq!(columns.len(), 0);
        }
        {
            let columns = columnar.read_subpath_columns("col2").unwrap();
            assert_eq!(columns.len(), 0);
        }
        {
            let columns = columnar.read_subpath_columns("other").unwrap();
            assert_eq!(columns.len(), 0);
        }
    }
    #[test]
    #[should_panic(expected = "Input type forbidden")]
    fn test_list_columns_strict_typing_panics_on_wrong_types() {
--- a/columnar/src/columnar/writer/column_operation.rs
+++ b/columnar/src/columnar/writer/column_operation.rs
@@ -122,6 +122,7 @@ impl<T> From<T> for ColumnOperation<T> {
 // In order to limit memory usage, and in order
 // to benefit from the stacker, we do this by serialization our data
 // as "Symbols".
 #[allow(clippy::from_over_into)]
 pub(super) trait SymbolValue: Clone + Copy {
    // Serializes the symbol into the given buffer.
    // Returns the number of bytes written into the buffer.
@@ -244,7 +245,7 @@ impl SymbolValue for UnorderedId {
 fn compute_num_bytes_for_u64(val: u64) -> usize {
    let msb = (64u32 - val.leading_zeros()) as usize;
-    msb.div_ceil(8)
+    (msb + 7) / 8
 }
 fn encode_zig_zag(n: i64) -> u64 {
--- a/columnar/src/columnar/writer/column_writers.rs
+++ b/columnar/src/columnar/writer/column_writers.rs
@@ -42,7 +42,7 @@ impl ColumnWriter {
        &self,
        arena: &MemoryArena,
        buffer: &'a mut Vec<u8>,
-    ) -> impl Iterator<Item = ColumnOperation<V>> + 'a + use<'a, V> {
+    ) -> impl Iterator<Item = ColumnOperation<V>> + 'a {
        buffer.clear();
        self.values.read_to_end(arena, buffer);
        let mut cursor: &[u8] = &buffer[..];
@@ -104,10 +104,9 @@ pub(crate) struct NumericalColumnWriter {
 impl NumericalColumnWriter {
    pub fn force_numerical_type(&mut self, numerical_type: NumericalType) {
-        assert!(
+        assert!(self
-            self.compatible_numerical_types
+            .compatible_numerical_types
-                .is_type_accepted(numerical_type)
+            .is_type_accepted(numerical_type));
        );
        self.compatible_numerical_types = CompatibleNumericalTypes::StaticType(numerical_type);
    }
 }
@@ -212,7 +211,7 @@ impl NumericalColumnWriter {
        self,
        arena: &MemoryArena,
        buffer: &'a mut Vec<u8>,
-    ) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a + use<'a> {
+    ) -> impl Iterator<Item = ColumnOperation<NumericalValue>> + 'a {
        self.column_writer.operation_iterator(arena, buffer)
    }
 }
@@ -256,7 +255,7 @@ impl StrOrBytesColumnWriter {
        &self,
        arena: &MemoryArena,
        byte_buffer: &'a mut Vec<u8>,
-    ) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a + use<'a> {
+    ) -> impl Iterator<Item = ColumnOperation<UnorderedId>> + 'a {
        self.column_writer.operation_iterator(arena, byte_buffer)
    }
 }
--- a/columnar/src/columnar/writer/mod.rs
+++ b/columnar/src/columnar/writer/mod.rs
@@ -8,13 +8,13 @@ use std::net::Ipv6Addr;
 use column_operation::ColumnOperation;
 pub(crate) use column_writers::CompatibleNumericalTypes;
 use common::CountingWriter;
 use common::json_path_writer::JSON_END_OF_PATH;
 use common::CountingWriter;
 pub(crate) use serializer::ColumnarSerializer;
 use stacker::{Addr, ArenaHashMap, MemoryArena};
 use crate::column_index::{SerializableColumnIndex, SerializableOptionalIndex};
-use crate::column_values::{MonotonicallyMappableToU64, MonotonicallyMappableToU128};
+use crate::column_values::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
 use crate::columnar::column_type::ColumnType;
 use crate::columnar::writer::column_writers::{
    ColumnWriter, NumericalColumnWriter, StrOrBytesColumnWriter,
@@ -285,6 +285,7 @@ impl ColumnarWriter {
                .map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
        );
        columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));
        let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
        let mut symbol_byte_buffer: Vec<u8> = Vec::new();
        for (column_name, column_type, addr) in columns {
@@ -391,7 +392,7 @@ impl ColumnarWriter {
 // Serialize [Dictionary, Column, dictionary num bytes U32::LE]
 // Column: [Column Index, Column Values, column index num bytes U32::LE]
-#[expect(clippy::too_many_arguments)]
+#[allow(clippy::too_many_arguments)]
 fn serialize_bytes_or_str_column(
    cardinality: Cardinality,
    num_docs: RowId,
--- a/columnar/src/columnar/writer/serializer.rs
+++ b/columnar/src/columnar/writer/serializer.rs
@@ -3,11 +3,11 @@ use std::io::Write;
 use common::json_path_writer::JSON_END_OF_PATH;
 use common::{BinarySerializable, CountingWriter};
 use sstable::RangeSSTable;
 use sstable::value::RangeValueWriter;
 use sstable::RangeSSTable;
 use crate::RowId;
 use crate::columnar::ColumnType;
 use crate::RowId;
 pub struct ColumnarSerializer<W: io::Write> {
    wrt: CountingWriter<W>,
@@ -67,7 +67,7 @@ pub struct ColumnSerializer<'a, W: io::Write> {
    start_offset: u64,
 }
-impl<W: io::Write> ColumnSerializer<'_, W> {
+impl<'a, W: io::Write> ColumnSerializer<'a, W> {
    pub fn finalize(self) -> io::Result<()> {
        let end_offset: u64 = self.columnar_serializer.wrt.written_bytes();
        let byte_range = self.start_offset..end_offset;
@@ -80,7 +80,7 @@ impl<W: io::Write> ColumnSerializer<'_, W> {
    }
 }
-impl<W: io::Write> io::Write for ColumnSerializer<'_, W> {
+impl<'a, W: io::Write> io::Write for ColumnSerializer<'a, W> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.columnar_serializer.wrt.write(buf)
    }
--- a/columnar/src/columnar/writer/value_index.rs
+++ b/columnar/src/columnar/writer/value_index.rs
@@ -1,6 +1,6 @@
 use crate::RowId;
 use crate::column_index::{SerializableMultivalueIndex, SerializableOptionalIndex};
 use crate::iterable::Iterable;
 use crate::RowId;
 /// The `IndexBuilder` interprets a sequence of
 /// calls of the form:
@@ -31,13 +31,12 @@ pub struct OptionalIndexBuilder {
 impl OptionalIndexBuilder {
    pub fn finish(&mut self, num_rows: RowId) -> impl Iterable<RowId> + '_ {
-        debug_assert!(
+        debug_assert!(self
-            self.docs
+            .docs
-                .last()
+            .last()
-                .copied()
+            .copied()
-                .map(|last_doc| last_doc < num_rows)
+            .map(|last_doc| last_doc < num_rows)
-                .unwrap_or(true)
+            .unwrap_or(true));
        );
        &self.docs[..]
    }
@@ -49,13 +48,12 @@ impl OptionalIndexBuilder {
 impl IndexBuilder for OptionalIndexBuilder {
    #[inline(always)]
    fn record_row(&mut self, doc: RowId) {
-        debug_assert!(
+        debug_assert!(self
-            self.docs
+            .docs
-                .last()
+            .last()
-                .copied()
+            .copied()
-                .map(|prev_doc| doc > prev_doc)
+            .map(|prev_doc| doc > prev_doc)
-                .unwrap_or(true)
+            .unwrap_or(true));
        );
        self.docs.push(doc);
    }
 }
--- a/columnar/src/compat_tests.rs
+++ b/columnar/src/compat_tests.rs
@@ -3,8 +3,8 @@ use std::path::PathBuf;
 use itertools::Itertools;
 use crate::{
-    CURRENT_VERSION, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
+    merge_columnar, Cardinality, Column, ColumnarReader, DynamicColumn, StackMergeOrder,
-    merge_columnar,
+    CURRENT_VERSION,
 };
 const NUM_DOCS: u32 = u16::MAX as u32;
--- a/columnar/src/dynamic_column.rs
+++ b/columnar/src/dynamic_column.rs
@@ -3,11 +3,10 @@ use std::sync::Arc;
 use std::{fmt, io};
 use common::file_slice::FileSlice;
-use common::{ByteCount, DateTime, OwnedBytes};
+use common::{ByteCount, DateTime, HasLen, OwnedBytes};
 use serde::{Deserialize, Serialize};
 use crate::column::{BytesColumn, Column, StrColumn};
-use crate::column_values::{StrictlyMonotonicFn, monotonic_map_column};
+use crate::column_values::{monotonic_map_column, StrictlyMonotonicFn};
 use crate::columnar::ColumnType;
 use crate::{Cardinality, ColumnIndex, ColumnValues, NumericalType, Version};
@@ -318,89 +317,10 @@ impl DynamicColumnHandle {
    }
    pub fn num_bytes(&self) -> ByteCount {
-        self.file_slice.num_bytes()
+        self.file_slice.len().into()
    }
    /// Legacy helper returning the column space usage.
    pub fn column_and_dictionary_num_bytes(&self) -> io::Result<ColumnSpaceUsage> {
        self.space_usage()
    }
    /// Return the space usage of the column, optionally broken down by dictionary and column
    /// values.
    ///
    /// For dictionary encoded columns (strings and bytes), this splits the total footprint into
    /// the dictionary and the remaining column data (including index and values).
    /// For all other column types, the dictionary size is `None` and the column size
    /// equals the total bytes.
    pub fn space_usage(&self) -> io::Result<ColumnSpaceUsage> {
        let total_num_bytes = self.num_bytes();
        let dynamic_column = self.open()?;
        let dictionary_num_bytes = match &dynamic_column {
            DynamicColumn::Bytes(bytes_column) => bytes_column.dictionary().num_bytes(),
            DynamicColumn::Str(str_column) => str_column.dictionary().num_bytes(),
            _ => {
                return Ok(ColumnSpaceUsage::new(self.num_bytes(), None));
            }
        };
        assert!(dictionary_num_bytes <= total_num_bytes);
        let column_num_bytes =
            ByteCount::from(total_num_bytes.get_bytes() - dictionary_num_bytes.get_bytes());
        Ok(ColumnSpaceUsage::new(
            column_num_bytes,
            Some(dictionary_num_bytes),
        ))
    }
    pub fn column_type(&self) -> ColumnType {
        self.column_type
    }
 }
 /// Represents space usage of a column.
 ///
 /// `column_num_bytes` tracks the column payload (index, values and footer).
 /// For dictionary encoded columns, `dictionary_num_bytes` captures the dictionary footprint.
 /// [`ColumnSpaceUsage::total_num_bytes`] returns the sum of both parts.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ColumnSpaceUsage {
    column_num_bytes: ByteCount,
    dictionary_num_bytes: Option<ByteCount>,
 }
 impl ColumnSpaceUsage {
    pub(crate) fn new(
        column_num_bytes: ByteCount,
        dictionary_num_bytes: Option<ByteCount>,
    ) -> Self {
        ColumnSpaceUsage {
            column_num_bytes,
            dictionary_num_bytes,
        }
    }
    pub fn column_num_bytes(&self) -> ByteCount {
        self.column_num_bytes
    }
    pub fn dictionary_num_bytes(&self) -> Option<ByteCount> {
        self.dictionary_num_bytes
    }
    pub fn total_num_bytes(&self) -> ByteCount {
        self.column_num_bytes + self.dictionary_num_bytes.unwrap_or_default()
    }
    /// Merge two space usage values by summing their components.
    pub fn merge(&self, other: &ColumnSpaceUsage) -> ColumnSpaceUsage {
        let dictionary_num_bytes = match (self.dictionary_num_bytes, other.dictionary_num_bytes) {
            (Some(lhs), Some(rhs)) => Some(lhs + rhs),
            (Some(val), None) | (None, Some(val)) => Some(val),
            (None, None) => None,
        };
        ColumnSpaceUsage {
            column_num_bytes: self.column_num_bytes + other.column_num_bytes,
            dictionary_num_bytes,
        }
    }
 }
--- a/columnar/src/iterable.rs
+++ b/columnar/src/iterable.rs
@@ -7,7 +7,7 @@ pub trait Iterable<T = u64> {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_>;
 }
-impl<T: Copy> Iterable<T> for &[T] {
+impl<'a, T: Copy> Iterable<T> for &'a [T] {
    fn boxed_iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
        Box::new(self.iter().copied())
    }
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -17,10 +17,15 @@
 //!       column.
 //!     - [column_values]: Stores the values of a column in a dense format.
 #![cfg_attr(all(feature = "unstable", test), feature(test))]
 #[cfg(test)]
 #[macro_use]
 extern crate more_asserts;
 #[cfg(all(test, feature = "unstable"))]
 extern crate test;
 use std::fmt::Display;
 use std::io;
@@ -39,16 +44,16 @@ pub use block_accessor::ColumnBlockAccessor;
 pub use column::{BytesColumn, Column, StrColumn};
 pub use column_index::ColumnIndex;
 pub use column_values::{
-    ColumnValues, EmptyColumnValues, MonotonicallyMappableToU64, MonotonicallyMappableToU128,
+    ColumnValues, EmptyColumnValues, MonotonicallyMappableToU128, MonotonicallyMappableToU64,
 };
 pub use columnar::{
-    CURRENT_VERSION, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
+    merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
-    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, merge_columnar,
+    MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
 };
 use sstable::VoidSSTable;
 pub use value::{NumericalType, NumericalValue};
-pub use self::dynamic_column::{ColumnSpaceUsage, DynamicColumn, DynamicColumnHandle};
+pub use self::dynamic_column::{DynamicColumn, DynamicColumnHandle};
 pub type RowId = u32;
 pub type DocId = u32;
--- a/columnar/src/tests.rs
+++ b/columnar/src/tests.rs
@@ -60,7 +60,7 @@ fn test_dataframe_writer_bool() {
    let DynamicColumn::Bool(bool_col) = dyn_bool_col else {
        panic!();
    };
-    let vals: Vec<Option<bool>> = (0..5).map(|doc_id| bool_col.first(doc_id)).collect();
+    let vals: Vec<Option<bool>> = (0..5).map(|row_id| bool_col.first(row_id)).collect();
    assert_eq!(&vals, &[None, Some(false), None, Some(true), None,]);
 }
@@ -108,7 +108,7 @@ fn test_dataframe_writer_ip_addr() {
    let DynamicColumn::IpAddr(ip_col) = dyn_bool_col else {
        panic!();
    };
-    let vals: Vec<Option<Ipv6Addr>> = (0..5).map(|doc_id| ip_col.first(doc_id)).collect();
+    let vals: Vec<Option<Ipv6Addr>> = (0..5).map(|row_id| ip_col.first(row_id)).collect();
    assert_eq!(
        &vals,
        &[
@@ -169,7 +169,7 @@ fn test_dictionary_encoded_str() {
    let DynamicColumn::Str(str_col) = col_handles[0].open().unwrap() else {
        panic!();
    };
-    let index: Vec<Option<u64>> = (0..5).map(|doc_id| str_col.ords().first(doc_id)).collect();
+    let index: Vec<Option<u64>> = (0..5).map(|row_id| str_col.ords().first(row_id)).collect();
    assert_eq!(index, &[None, Some(0), None, Some(2), Some(1)]);
    assert_eq!(str_col.num_rows(), 5);
    let mut term_buffer = String::new();
@@ -204,7 +204,7 @@ fn test_dictionary_encoded_bytes() {
        panic!();
    };
    let index: Vec<Option<u64>> = (0..5)
-        .map(|doc_id| bytes_col.ords().first(doc_id))
+        .map(|row_id| bytes_col.ords().first(row_id))
        .collect();
    assert_eq!(index, &[None, Some(0), None, Some(2), Some(1)]);
    assert_eq!(bytes_col.num_rows(), 5);
@@ -380,7 +380,7 @@ fn assert_columnar_eq(
    right: &ColumnarReader,
    lenient_on_numerical_value: bool,
 ) {
-    assert_eq!(left.num_docs(), right.num_docs());
+    assert_eq!(left.num_rows(), right.num_rows());
    let left_columns = left.list_columns().unwrap();
    let right_columns = right.list_columns().unwrap();
    assert_eq!(left_columns.len(), right_columns.len());
@@ -588,7 +588,7 @@ proptest! {
    #[test]
    fn test_single_columnar_builder_proptest(docs in columnar_docs_strategy()) {
        let columnar = build_columnar(&docs[..]);
-        assert_eq!(columnar.num_docs() as usize, docs.len());
+        assert_eq!(columnar.num_rows() as usize, docs.len());
        let mut expected_columns: HashMap<(&str, ColumnTypeCategory), HashMap<u32, Vec<&ColumnValue>> > = Default::default();
        for (doc_id, doc_vals) in docs.iter().enumerate() {
            for (col_name, col_val) in doc_vals {
@@ -715,9 +715,8 @@ fn test_columnar_merging_number_columns() {
 // TODO test required_columns
 // TODO document edge case: required_columns incompatible with values.
-#[allow(clippy::type_complexity)]
+fn columnar_docs_and_remap(
-fn columnar_docs_and_remap()
+) -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
 -> impl Strategy<Value = (Vec<Vec<Vec<(&'static str, ColumnValue)>>>, Vec<RowAddr>)> {
    proptest::collection::vec(columnar_docs_strategy(), 2..=3).prop_flat_map(
        |columnars_docs: Vec<Vec<Vec<(&str, ColumnValue)>>>| {
            let row_addrs: Vec<RowAddr> = columnars_docs
@@ -820,7 +819,7 @@ fn test_columnar_merge_empty() {
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
-    assert_eq!(merged_columnar.num_docs(), 0);
+    assert_eq!(merged_columnar.num_rows(), 0);
    assert_eq!(merged_columnar.num_columns(), 0);
 }
@@ -846,7 +845,7 @@ fn test_columnar_merge_single_str_column() {
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
-    assert_eq!(merged_columnar.num_docs(), 1);
+    assert_eq!(merged_columnar.num_rows(), 1);
    assert_eq!(merged_columnar.num_columns(), 1);
 }
@@ -878,7 +877,7 @@ fn test_delete_decrease_cardinality() {
    )
    .unwrap();
    let merged_columnar = ColumnarReader::open(output).unwrap();
-    assert_eq!(merged_columnar.num_docs(), 1);
+    assert_eq!(merged_columnar.num_rows(), 1);
    assert_eq!(merged_columnar.num_columns(), 1);
    let cols = merged_columnar.read_columns("c").unwrap();
    assert_eq!(cols.len(), 1);
--- a/columnar/src/value.rs
+++ b/columnar/src/value.rs
@@ -1,5 +1,3 @@
 use std::str::FromStr;
 use common::DateTime;
 use crate::InvalidData;
@@ -11,23 +9,6 @@ pub enum NumericalValue {
    F64(f64),
 }
 impl FromStr for NumericalValue {
    type Err = ();
    fn from_str(s: &str) -> Result<Self, ()> {
        if let Ok(val_i64) = s.parse::<i64>() {
            return Ok(val_i64.into());
        }
        if let Ok(val_u64) = s.parse::<u64>() {
            return Ok(val_u64.into());
        }
        if let Ok(val_f64) = s.parse::<f64>() {
            return Ok(NumericalValue::from(val_f64).normalize());
        }
        Err(())
    }
 }
 impl NumericalValue {
    pub fn numerical_type(&self) -> NumericalType {
        match self {
@@ -45,7 +26,7 @@ impl NumericalValue {
                if val <= i64::MAX as u64 {
                    NumericalValue::I64(val as i64)
                } else {
-                    NumericalValue::U64(val)
+                    NumericalValue::F64(val as f64)
                }
            }
            NumericalValue::I64(val) => NumericalValue::I64(val),
@@ -160,7 +141,6 @@ impl Coerce for DateTime {
 #[cfg(test)]
 mod tests {
    use super::NumericalType;
    use crate::NumericalValue;
    #[test]
    fn test_numerical_type_code() {
@@ -173,58 +153,4 @@ mod tests {
        }
        assert_eq!(num_numerical_type, 3);
    }
    #[test]
    fn test_parse_numerical() {
        assert_eq!(
            "123".parse::<NumericalValue>().unwrap(),
            NumericalValue::I64(123)
        );
        assert_eq!(
            "18446744073709551615".parse::<NumericalValue>().unwrap(),
            NumericalValue::U64(18446744073709551615u64)
        );
        assert_eq!(
            "1.0".parse::<NumericalValue>().unwrap(),
            NumericalValue::I64(1i64)
        );
        assert_eq!(
            "1.1".parse::<NumericalValue>().unwrap(),
            NumericalValue::F64(1.1f64)
        );
        assert_eq!(
            "-1.0".parse::<NumericalValue>().unwrap(),
            NumericalValue::I64(-1i64)
        );
    }
    #[test]
    fn test_normalize_numerical() {
        assert_eq!(
            NumericalValue::from(1u64).normalize(),
            NumericalValue::I64(1i64),
        );
        let limit_val = i64::MAX as u64 + 1u64;
        assert_eq!(
            NumericalValue::from(limit_val).normalize(),
            NumericalValue::U64(limit_val),
        );
        assert_eq!(
            NumericalValue::from(-1i64).normalize(),
            NumericalValue::I64(-1i64),
        );
        assert_eq!(
            NumericalValue::from(-2.0f64).normalize(),
            NumericalValue::I64(-2i64),
        );
        assert_eq!(
            NumericalValue::from(-2.1f64).normalize(),
            NumericalValue::F64(-2.1f64),
        );
        let large_float = 2.0f64.powf(70.0f64);
        assert_eq!(
            NumericalValue::from(large_float).normalize(),
            NumericalValue::F64(large_float),
        );
    }
 }
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,9 +1,9 @@
 [package]
 name = "tantivy-common"
-version = "0.10.0"
+version = "0.7.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
-edition = "2024"
+edition = "2021"
 description = "common traits and utility functions used by multiple tantivy subcrates"
 documentation = "https://docs.rs/tantivy_common/"
 homepage = "https://github.com/quickwit-oss/tantivy"
@@ -13,13 +13,13 @@ repository = "https://github.com/quickwit-oss/tantivy"
 [dependencies]
 byteorder = "1.4.3"
-ownedbytes = { version= "0.9", path="../ownedbytes" }
+ownedbytes = { version= "0.7", path="../ownedbytes" }
 async-trait = "0.1"
 time = { version = "0.3.10", features = ["serde-well-known"] }
 serde = { version = "1.0.136", features = ["derive"] }
 [dev-dependencies]
-binggan = "0.14.0"
+binggan = "0.12.0"
 proptest = "1.0.0"
 rand = "0.8.4"
--- a/common/benches/bench.rs
+++ b/common/benches/bench.rs
@@ -1,7 +1,7 @@
-use binggan::{BenchRunner, black_box};
+use binggan::{black_box, BenchRunner};
 use rand::seq::IteratorRandom;
 use rand::thread_rng;
-use tantivy_common::{BitSet, TinySet, serialize_vint_u32};
+use tantivy_common::{serialize_vint_u32, BitSet, TinySet};
 fn bench_vint() {
    let mut runner = BenchRunner::new();
@@ -15,6 +15,7 @@ fn bench_vint() {
            out += u64::from(buf[0]);
        }
        black_box(out);
        None
    });
    let vals: Vec<u32> = (0..20_000).choose_multiple(&mut thread_rng(), 100_000);
@@ -26,6 +27,7 @@ fn bench_vint() {
            out += u64::from(buf[0]);
        }
        black_box(out);
        None
    });
 }
@@ -41,20 +43,24 @@ fn bench_bitset() {
        tinyset.pop_lowest();
        tinyset.pop_lowest();
        black_box(tinyset);
        None
    });
    let tiny_set = TinySet::empty().insert(10u32).insert(14u32).insert(21u32);
    runner.bench_function("bench_tinyset_sum", move |_| {
        assert_eq!(black_box(tiny_set).into_iter().sum::<u32>(), 45u32);
        None
    });
    let v = [10u32, 14u32, 21u32];
    runner.bench_function("bench_tinyarr_sum", move |_| {
        black_box(v.iter().cloned().sum::<u32>());
        None
    });
    runner.bench_function("bench_bitset_initialize", move |_| {
        black_box(BitSet::with_max_value(1_000_000));
        None
    });
 }
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -181,17 +181,9 @@ pub struct BitSet {
    len: u64,
    max_value: u32,
 }
 impl std::fmt::Debug for BitSet {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("BitSet")
            .field("len", &self.len)
            .field("max_value", &self.max_value)
            .finish()
    }
 }
 fn num_buckets(max_val: u32) -> u32 {
-    max_val.div_ceil(64u32)
+    (max_val + 63u32) / 64u32
 }
 impl BitSet {
--- a/common/src/bounds.rs
+++ b/common/src/bounds.rs
@@ -65,11 +65,11 @@ pub fn transform_bound_inner_res<TFrom, TTo>(
 ) -> io::Result<Bound<TTo>> {
    use self::Bound::*;
    Ok(match bound {
-        Excluded(from_val) => match transform(from_val)? {
+        Excluded(ref from_val) => match transform(from_val)? {
            TransformBound::NewBound(new_val) => new_val,
            TransformBound::Existing(new_val) => Excluded(new_val),
        },
-        Included(from_val) => match transform(from_val)? {
+        Included(ref from_val) => match transform(from_val)? {
            TransformBound::NewBound(new_val) => new_val,
            TransformBound::Existing(new_val) => Included(new_val),
        },
@@ -85,11 +85,11 @@ pub fn transform_bound_inner<TFrom, TTo>(
 ) -> Bound<TTo> {
    use self::Bound::*;
    match bound {
-        Excluded(from_val) => match transform(from_val) {
+        Excluded(ref from_val) => match transform(from_val) {
            TransformBound::NewBound(new_val) => new_val,
            TransformBound::Existing(new_val) => Excluded(new_val),
        },
-        Included(from_val) => match transform(from_val) {
+        Included(ref from_val) => match transform(from_val) {
            TransformBound::NewBound(new_val) => new_val,
            TransformBound::Existing(new_val) => Included(new_val),
        },
@@ -111,8 +111,8 @@ pub fn map_bound<TFrom, TTo>(
 ) -> Bound<TTo> {
    use self::Bound::*;
    match bound {
-        Excluded(from_val) => Bound::Excluded(transform(from_val)),
+        Excluded(ref from_val) => Bound::Excluded(transform(from_val)),
-        Included(from_val) => Bound::Included(transform(from_val)),
+        Included(ref from_val) => Bound::Included(transform(from_val)),
        Unbounded => Unbounded,
    }
 }
@@ -123,8 +123,8 @@ pub fn map_bound_res<TFrom, TTo, Err>(
 ) -> Result<Bound<TTo>, Err> {
    use self::Bound::*;
    Ok(match bound {
-        Excluded(from_val) => Excluded(transform(from_val)?),
+        Excluded(ref from_val) => Excluded(transform(from_val)?),
-        Included(from_val) => Included(transform(from_val)?),
+        Included(ref from_val) => Included(transform(from_val)?),
        Unbounded => Unbounded,
    })
 }
--- a/common/src/file_slice.rs
+++ b/common/src/file_slice.rs
@@ -1,6 +1,5 @@
 use std::fs::File;
 use std::ops::{Deref, Range, RangeBounds};
 use std::path::Path;
 use std::sync::Arc;
 use std::{fmt, io};
@@ -74,7 +73,7 @@ impl FileHandle for WrapFile {
        {
            use std::io::{Read, Seek};
            let mut file = self.file.try_clone()?; // Clone the file to read from it separately
-            // Seek to the start position in the file
+                                                   // Seek to the start position in the file
            file.seek(io::SeekFrom::Start(start as u64))?;
            // Read the data into the buffer
            file.read_exact(&mut buffer)?;
@@ -178,12 +177,6 @@ fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R)
 }
 impl FileSlice {
    /// Creates a FileSlice from a path.
    pub fn open(path: &Path) -> io::Result<FileSlice> {
        let wrap_file = WrapFile::new(File::open(path)?)?;
        Ok(FileSlice::new(Arc::new(wrap_file)))
    }
    /// Wraps a FileHandle.
    pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
        let num_bytes = file_handle.len();
@@ -346,8 +339,8 @@ mod tests {
    use std::sync::Arc;
    use super::{FileHandle, FileSlice};
    use crate::HasLen;
    use crate::file_slice::combine_ranges;
    use crate::HasLen;
    #[test]
    fn test_file_slice() -> io::Result<()> {
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -22,7 +22,7 @@ pub use json_path_writer::JsonPathWriter;
 pub use ownedbytes::{OwnedBytes, StableDeref};
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
 pub use vint::{
-    VInt, VIntU128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint,
+    read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt, VIntU128,
 };
 pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};
@@ -130,11 +130,11 @@ pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
 }
 #[cfg(test)]
-pub(crate) mod test {
+pub mod test {
    use proptest::prelude::*;
-    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
+    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize};
    fn test_i64_converter_helper(val: i64) {
        assert_eq!(u64_to_i64(i64_to_u64(val)), val);
@@ -144,6 +144,12 @@ pub(crate) mod test {
        assert_eq!(u64_to_f64(f64_to_u64(val)), val);
    }
    pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
        let mut buffer = Vec::new();
        O::default().serialize(&mut buffer).unwrap();
        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
    }
    proptest! {
        #[test]
        fn test_f64_converter_monotonicity_proptest((left, right) in (proptest::num::f64::NORMAL, proptest::num::f64::NORMAL)) {
@@ -177,10 +183,8 @@ pub(crate) mod test {
    #[test]
    fn test_f64_order() {
-        assert!(
+        assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
-            !(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
+            .contains(&f64_to_u64(f64::NAN))); // nan is not a number
                .contains(&f64_to_u64(f64::NAN))
        ); // nan is not a number
        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -74,14 +74,14 @@ impl FixedSize for () {
 impl<T: BinarySerializable> BinarySerializable for Vec<T> {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        BinarySerializable::serialize(&VInt(self.len() as u64), writer)?;
+        VInt(self.len() as u64).serialize(writer)?;
        for it in self {
            it.serialize(writer)?;
        }
        Ok(())
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
-        let num_items = <VInt as BinarySerializable>::deserialize(reader)?.val();
+        let num_items = VInt::deserialize(reader)?.val();
        let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
        for _ in 0..num_items {
            let item = T::deserialize(reader)?;
@@ -236,12 +236,12 @@ impl FixedSize for bool {
 impl BinarySerializable for String {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
-        BinarySerializable::serialize(&VInt(data.len() as u64), writer)?;
+        VInt(data.len() as u64).serialize(writer)?;
        writer.write_all(data)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
-        let string_length = <VInt as BinarySerializable>::deserialize(reader)?.val() as usize;
+        let string_length = VInt::deserialize(reader)?.val() as usize;
        let mut result = String::with_capacity(string_length);
        reader
            .take(string_length as u64)
@@ -253,12 +253,12 @@ impl BinarySerializable for String {
 impl<'a> BinarySerializable for Cow<'a, str> {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
        let data: &[u8] = self.as_bytes();
-        BinarySerializable::serialize(&VInt(data.len() as u64), writer)?;
+        VInt(data.len() as u64).serialize(writer)?;
        writer.write_all(data)
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, str>> {
-        let string_length = <VInt as BinarySerializable>::deserialize(reader)?.val() as usize;
+        let string_length = VInt::deserialize(reader)?.val() as usize;
        let mut result = String::with_capacity(string_length);
        reader
            .take(string_length as u64)
@@ -269,18 +269,18 @@ impl<'a> BinarySerializable for Cow<'a, str> {
 impl<'a> BinarySerializable for Cow<'a, [u8]> {
    fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
-        BinarySerializable::serialize(&VInt(self.len() as u64), writer)?;
+        VInt(self.len() as u64).serialize(writer)?;
        for it in self.iter() {
-            BinarySerializable::serialize(it, writer)?;
+            it.serialize(writer)?;
        }
        Ok(())
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Cow<'a, [u8]>> {
-        let num_items = <VInt as BinarySerializable>::deserialize(reader)?.val();
+        let num_items = VInt::deserialize(reader)?.val();
        let mut items: Vec<u8> = Vec::with_capacity(num_items as usize);
        for _ in 0..num_items {
-            let item = <u8 as BinarySerializable>::deserialize(reader)?;
+            let item = u8::deserialize(reader)?;
            items.push(item);
        }
        Ok(Cow::Owned(items))
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -28,9 +28,7 @@ impl BinarySerializable for VIntU128 {
        writer.write_all(&buffer)
    }
    #[allow(clippy::unbuffered_bytes)]
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        #[allow(clippy::unbuffered_bytes)]
        let mut bytes = reader.bytes();
        let mut result = 0u128;
        let mut shift = 0u64;
@@ -197,9 +195,7 @@ impl BinarySerializable for VInt {
        writer.write_all(&buffer[0..num_bytes])
    }
    #[allow(clippy::unbuffered_bytes)]
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
        #[allow(clippy::unbuffered_bytes)]
        let mut bytes = reader.bytes();
        let mut result = 0u64;
        let mut shift = 0u64;
@@ -226,7 +222,7 @@ impl BinarySerializable for VInt {
 #[cfg(test)]
 mod tests {
-    use super::{BinarySerializable, VInt, serialize_vint_u32};
+    use super::{serialize_vint_u32, BinarySerializable, VInt};
    fn aux_test_vint(val: u64) {
        let mut v = [14u8; 10];
--- a/common/src/writer.rs
+++ b/common/src/writer.rs
@@ -87,7 +87,7 @@ impl<W: TerminatingWrite> TerminatingWrite for BufWriter<W> {
    }
 }
-impl TerminatingWrite for &mut Vec<u8> {
+impl<'a> TerminatingWrite for &'a mut Vec<u8> {
    fn terminate_ref(&mut self, _a: AntiCallToken) -> io::Result<()> {
        self.flush()
    }
--- a/doc/assets/images/paradedb.png
+++ b/doc/assets/images/paradedb.png
--- a/doc/assets/images/searchbenchmark.png
+++ b/doc/assets/images/searchbenchmark.png
--- a/doc/src/avant-propos.md
+++ b/doc/src/avant-propos.md
@@ -2,7 +2,7 @@
 > Tantivy is a **search** engine **library** for Rust.
-If you are familiar with Lucene, it's an excellent approximation to consider tantivy as Lucene for Rust. Tantivy is heavily inspired by Lucene's design and
+If you are familiar with Lucene, it's an excellent approximation to consider tantivy as Lucene for rust. tantivy is heavily inspired by Lucene's design and
 they both have the same scope and targeted use cases.
 If you are not familiar with Lucene, let's break down our little tagline.
@@ -17,7 +17,7 @@ relevancy, collapsing, highlighting, spatial search.
  experience. But keep in mind this is just a toolbox.
  Which bring us to the second keyword...
- **Library** means that you will have to write code. Tantivy is not an *all-in-one* server solution like Elasticsearch for instance.
+- **Library** means that you will have to write code. tantivy is not an *all-in-one* server solution like elastic search for instance.
  Sometimes a functionality will not be available in tantivy because it is too
  specific to your use case. By design, tantivy should make it possible to extend
@@ -31,4 +31,4 @@ relevancy, collapsing, highlighting, spatial search.
  index from a different format.
  Tantivy exposes a lot of low level API to do all of these things.
-  
+  
--- a/doc/src/basis.md
+++ b/doc/src/basis.md
@@ -11,7 +11,7 @@ directory shipped with tantivy is the `MmapDirectory`.
 While this design has some downsides, this greatly simplifies the source code of
 tantivy. Caching is also entirely delegated to the OS.
-Tantivy works entirely (or almost) by directly reading the datastructures as they are laid on disk. As a result, the act of opening an indexing does not involve loading different datastructures from the disk into random access memory : starting a process, opening an index, and performing your first query can typically be done in a matter of milliseconds.
+`tantivy` works entirely (or almost) by directly reading the datastructures as they are laid on disk. As a result, the act of opening an indexing does not involve loading different datastructures from the disk into random access memory : starting a process, opening an index, and performing your first query can typically be done in a matter of milliseconds.
 This is an interesting property for a command line search engine, or for some multi-tenant log search engine : spawning a new process for each new query can be a perfectly sensible solution in some use case.
--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -31,13 +31,13 @@ Compression ratio is mainly affected on the fast field of the sorted property, e
 When data is presorted by a field and search queries request sorting by the same field, we can leverage the natural order of the documents.
 E.g. if the data is sorted by timestamp and want the top n newest docs containing a term, we can simply leveraging the order of the docids.
-Note: tantivy 0.16 does not do this optimization yet.
+Note: Tantivy 0.16 does not do this optimization yet.
 ### Pruning
 Let's say we want all documents and want to apply the filter `>= 2010-08-11`. When the data is sorted, we could make a lookup in the fast field to find the docid range and use this as the filter.
-Note: tantivy 0.16 does not do this optimization yet.
+Note: Tantivy 0.16 does not do this optimization yet.
 ### Other?
@@ -45,7 +45,7 @@ In principle there are many algorithms possible that exploit the monotonically i
 ## Usage
-The index sorting can be configured setting [`sort_by_field`](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/core/index_meta.rs#L238) on `IndexSettings` and passing it to a `IndexBuilder`. As of tantivy 0.16 only fast fields are allowed to be used.
+The index sorting can be configured setting [`sort_by_field`](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/core/index_meta.rs#L238) on `IndexSettings` and passing it to a `IndexBuilder`. As of Tantivy 0.16 only fast fields are allowed to be used.
 ```rust
 let settings = IndexSettings {
--- a/doc/src/json.md
+++ b/doc/src/json.md
@@ -39,7 +39,7 @@ Its representation is done by separating segments by a unicode char `\x01`, and
 - `value`: The value representation is just the regular Value representation.
 This representation is designed to align the natural sort of Terms with the lexicographical sort
-of their binary representation (tantivy's dictionary (whether fst or sstable) is sorted and does prefix encoding).
+of their binary representation (Tantivy's dictionary (whether fst or sstable) is sorted and does prefix encoding).
 In the example above, the terms will be sorted as
--- a/examples/basic_search.rs
+++ b/examples/basic_search.rs
@@ -51,7 +51,7 @@ fn main() -> tantivy::Result<()> {
    // Our second field is body.
    // We want full-text search for it, but we do not
-    // need to be able to retrieve it
+    // need to be able to be able to retrieve it
    // for our application.
    //
    // We can make our index lighter by omitting the `STORED` flag.
@@ -208,7 +208,7 @@ fn main() -> tantivy::Result<()> {
    // is the role of the `TopDocs` collector.
    // We can now perform our query.
-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
    // The actual documents still need to be
    // retrieved from Tantivy's store.
@@ -226,7 +226,7 @@ fn main() -> tantivy::Result<()> {
    let query = query_parser.parse_query("title:sea^20 body:whale^70")?;
    let (_score, doc_address) = searcher
-        .search(&query, &TopDocs::with_limit(1).order_by_score())?
+        .search(&query, &TopDocs::with_limit(1))?
        .into_iter()
        .next()
        .unwrap();
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -100,7 +100,7 @@ fn main() -> tantivy::Result<()> {
    // here we want to get a hit on the 'ken' in Frankenstein
    let query = query_parser.parse_query("ken")?;
-    let top_docs = searcher.search(&query, &TopDocs::with_limit(10).order_by_score())?;
+    let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;
    for (_, doc_address) in top_docs {
        let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Pascal Seitz	80538175e8	fix build	2024-10-16 10:33:24 +08:00
dependabot[bot]	8dc942e8e7	Update binggan requirement from 0.10.0 to 0.12.0 --- updated-dependencies: - dependency-name: binggan dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2024-10-15 20:05:11 +00:00