From 9e2ddec4b31a57b32e4033b63a0f1827a000a777 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Sat, 13 Jul 2024 20:28:12 +0200 Subject: [PATCH] merge adjacent block when building delta for automaton --- sstable/Cargo.toml | 1 + sstable/src/dictionary.rs | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/sstable/Cargo.toml b/sstable/Cargo.toml index cf840ec80..7a2702486 100644 --- a/sstable/Cargo.toml +++ b/sstable/Cargo.toml @@ -12,6 +12,7 @@ description = "sstables for tantivy" [dependencies] common = {version= "0.7", path="../common", package="tantivy-common"} futures-util = "0.3.30" +itertools = "0.13.0" tantivy-bitpacker = { version= "0.6", path="../bitpacker" } tantivy-fst = "0.5" # experimental gives us access to Decompressor::upper_bound diff --git a/sstable/src/dictionary.rs b/sstable/src/dictionary.rs index e57748aa1..b98513788 100644 --- a/sstable/src/dictionary.rs +++ b/sstable/src/dictionary.rs @@ -8,6 +8,7 @@ use common::bounds::{transform_bound_inner_res, TransformBound}; use common::file_slice::FileSlice; use common::{BinarySerializable, OwnedBytes}; use futures_util::{stream, StreamExt, TryStreamExt}; +use itertools::Itertools; use tantivy_fst::automaton::AlwaysMatch; use tantivy_fst::Automaton; @@ -254,6 +255,16 @@ impl Dictionary { .get_block_for_automaton(automaton) .filter(move |(block_id, _)| block_range.contains(block_id)) .map(|(_, block_addr)| block_addr) + .coalesce(|first, second| { + if first.byte_range.end == second.byte_range.start { + Ok(BlockAddr { + first_ordinal: first.first_ordinal, + byte_range: first.byte_range.start..second.byte_range.end, + }) + } else { + Err((first, second)) + } + }) } /// Opens a `TermDictionary`.