From 790baa7adf0775dafa20e7f155d44c1a36b2d093 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 16 Aug 2019 10:29:28 +0900 Subject: [PATCH] Integrated state into TermDict streamer --- Cargo.toml | 2 +- src/query/automaton_weight.rs | 3 +++ src/termdict/streamer.rs | 19 +++++++++++++++---- src/termdict/termdict.rs | 6 +++++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cf01250a9..a4030392a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ base64 = "0.10.0" byteorder = "1.0" once_cell = "0.2" regex = "1.0" -tantivy-fst = "0.1" +tantivy-fst = {git="https://github.com/tantivy-search/fst"} memmap = {version = "0.7", optional=true} lz4 = {version="1.20", optional=true} snap = {version="0.2"} diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index eefb00390..6d106fd9c 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -14,6 +14,7 @@ use tantivy_fst::Automaton; pub struct AutomatonWeight where A: Automaton + Send + Sync + 'static, + A::State: Clone + Default + Sized, { field: Field, automaton: A, @@ -22,6 +23,7 @@ where impl AutomatonWeight where A: Automaton + Send + Sync + 'static, + A::State: Clone + Default + Sized, { /// Create a new AutomationWeight pub fn new(field: Field, automaton: A) -> AutomatonWeight { @@ -37,6 +39,7 @@ where impl Weight for AutomatonWeight where A: Automaton + Send + Sync + 'static, + A::State: Clone + Default + Sized, { fn scorer(&self, reader: &SegmentReader) -> Result> { let max_doc = reader.max_doc(); diff --git a/src/termdict/streamer.rs b/src/termdict/streamer.rs index e5aecc1d6..0848c25b3 100644 --- a/src/termdict/streamer.rs +++ b/src/termdict/streamer.rs @@ -2,7 +2,7 @@ use super::TermDictionary; use crate::postings::TermInfo; use crate::termdict::TermOrdinal; use tantivy_fst::automaton::AlwaysMatch; -use tantivy_fst::map::{Stream, StreamBuilder}; +use tantivy_fst::map::{Stream, StreamBuilder, StreamWithState}; use tantivy_fst::Automaton; use tantivy_fst::{IntoStreamer, Streamer}; @@ -11,6 +11,7 @@ use tantivy_fst::{IntoStreamer, Streamer}; pub struct TermStreamerBuilder<'a, A = AlwaysMatch> where A: Automaton, + A::State: Clone, { fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>, @@ -19,6 +20,7 @@ where impl<'a, A> TermStreamerBuilder<'a, A> where A: Automaton, + A::State: Clone + Default + Sized, { pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self { TermStreamerBuilder { @@ -56,10 +58,11 @@ where pub fn into_stream(self) -> TermStreamer<'a, A> { TermStreamer { fst_map: self.fst_map, - stream: self.stream_builder.into_stream(), + stream: self.stream_builder.with_state().into_stream(), term_ord: 0u64, current_key: Vec::with_capacity(100), current_value: TermInfo::default(), + state: Default::default(), } } } @@ -69,27 +72,31 @@ where pub struct TermStreamer<'a, A = AlwaysMatch> where A: Automaton, + A::State: Clone + Default + Sized, { fst_map: &'a TermDictionary, - stream: Stream<'a, A>, + stream: StreamWithState<'a, A>, term_ord: TermOrdinal, current_key: Vec, current_value: TermInfo, + state: A::State, } impl<'a, A> TermStreamer<'a, A> where A: Automaton, + A::State: Clone + Default + Sized, { /// Advance position the stream on the next item. /// Before the first call to `.advance()`, the stream /// is an unitialized state. pub fn advance(&mut self) -> bool { - if let Some((term, term_ord)) = self.stream.next() { + if let Some((term, term_ord, state)) = self.stream.next() { self.current_key.clear(); self.current_key.extend_from_slice(term); self.term_ord = term_ord; self.current_value = self.fst_map.term_info_from_ord(term_ord); + self.state = state; true } else { false @@ -118,6 +125,10 @@ where &self.current_key } + pub fn state(&self) -> &A::State { + &self.state + } + /// Accesses the current value. /// /// Calling `.value()` after the end of the stream will return the diff --git a/src/termdict/termdict.rs b/src/termdict/termdict.rs index 6bd47ee62..69dee6e33 100644 --- a/src/termdict/termdict.rs +++ b/src/termdict/termdict.rs @@ -197,7 +197,11 @@ impl TermDictionary { /// Returns a search builder, to stream all of the terms /// within the Automaton - pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A> { + pub fn search<'a, A>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A> + where + A: Automaton + 'a, + A::State: Clone + Default + Sized, + { let stream_builder = self.fst_index.search(automaton); TermStreamerBuilder::::new(self, stream_builder) }