Compare commits

...

1 Commits

Author SHA1 Message Date
Paul Masurel
790baa7adf Integrated state into TermDict streamer 2019-08-16 10:29:28 +09:00
4 changed files with 24 additions and 6 deletions

View File

@@ -17,7 +17,7 @@ base64 = "0.10.0"
byteorder = "1.0" byteorder = "1.0"
once_cell = "0.2" once_cell = "0.2"
regex = "1.0" regex = "1.0"
tantivy-fst = "0.1" tantivy-fst = {git="https://github.com/tantivy-search/fst"}
memmap = {version = "0.7", optional=true} memmap = {version = "0.7", optional=true}
lz4 = {version="1.20", optional=true} lz4 = {version="1.20", optional=true}
snap = {version="0.2"} snap = {version="0.2"}

View File

@@ -14,6 +14,7 @@ use tantivy_fst::Automaton;
pub struct AutomatonWeight<A> pub struct AutomatonWeight<A>
where where
A: Automaton + Send + Sync + 'static, A: Automaton + Send + Sync + 'static,
A::State: Clone + Default + Sized,
{ {
field: Field, field: Field,
automaton: A, automaton: A,
@@ -22,6 +23,7 @@ where
impl<A> AutomatonWeight<A> impl<A> AutomatonWeight<A>
where where
A: Automaton + Send + Sync + 'static, A: Automaton + Send + Sync + 'static,
A::State: Clone + Default + Sized,
{ {
/// Create a new AutomationWeight /// Create a new AutomationWeight
pub fn new(field: Field, automaton: A) -> AutomatonWeight<A> { pub fn new(field: Field, automaton: A) -> AutomatonWeight<A> {
@@ -37,6 +39,7 @@ where
impl<A> Weight for AutomatonWeight<A> impl<A> Weight for AutomatonWeight<A>
where where
A: Automaton + Send + Sync + 'static, A: Automaton + Send + Sync + 'static,
A::State: Clone + Default + Sized,
{ {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<dyn Scorer>> { fn scorer(&self, reader: &SegmentReader) -> Result<Box<dyn Scorer>> {
let max_doc = reader.max_doc(); let max_doc = reader.max_doc();

View File

@@ -2,7 +2,7 @@ use super::TermDictionary;
use crate::postings::TermInfo; use crate::postings::TermInfo;
use crate::termdict::TermOrdinal; use crate::termdict::TermOrdinal;
use tantivy_fst::automaton::AlwaysMatch; use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::map::{Stream, StreamBuilder}; use tantivy_fst::map::{Stream, StreamBuilder, StreamWithState};
use tantivy_fst::Automaton; use tantivy_fst::Automaton;
use tantivy_fst::{IntoStreamer, Streamer}; use tantivy_fst::{IntoStreamer, Streamer};
@@ -11,6 +11,7 @@ use tantivy_fst::{IntoStreamer, Streamer};
pub struct TermStreamerBuilder<'a, A = AlwaysMatch> pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
where where
A: Automaton, A: Automaton,
A::State: Clone,
{ {
fst_map: &'a TermDictionary, fst_map: &'a TermDictionary,
stream_builder: StreamBuilder<'a, A>, stream_builder: StreamBuilder<'a, A>,
@@ -19,6 +20,7 @@ where
impl<'a, A> TermStreamerBuilder<'a, A> impl<'a, A> TermStreamerBuilder<'a, A>
where where
A: Automaton, A: Automaton,
A::State: Clone + Default + Sized,
{ {
pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self { pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
TermStreamerBuilder { TermStreamerBuilder {
@@ -56,10 +58,11 @@ where
pub fn into_stream(self) -> TermStreamer<'a, A> { pub fn into_stream(self) -> TermStreamer<'a, A> {
TermStreamer { TermStreamer {
fst_map: self.fst_map, fst_map: self.fst_map,
stream: self.stream_builder.into_stream(), stream: self.stream_builder.with_state().into_stream(),
term_ord: 0u64, term_ord: 0u64,
current_key: Vec::with_capacity(100), current_key: Vec::with_capacity(100),
current_value: TermInfo::default(), current_value: TermInfo::default(),
state: Default::default(),
} }
} }
} }
@@ -69,27 +72,31 @@ where
pub struct TermStreamer<'a, A = AlwaysMatch> pub struct TermStreamer<'a, A = AlwaysMatch>
where where
A: Automaton, A: Automaton,
A::State: Clone + Default + Sized,
{ {
fst_map: &'a TermDictionary, fst_map: &'a TermDictionary,
stream: Stream<'a, A>, stream: StreamWithState<'a, A>,
term_ord: TermOrdinal, term_ord: TermOrdinal,
current_key: Vec<u8>, current_key: Vec<u8>,
current_value: TermInfo, current_value: TermInfo,
state: A::State,
} }
impl<'a, A> TermStreamer<'a, A> impl<'a, A> TermStreamer<'a, A>
where where
A: Automaton, A: Automaton,
A::State: Clone + Default + Sized,
{ {
/// Advance position the stream on the next item. /// Advance position the stream on the next item.
/// Before the first call to `.advance()`, the stream /// Before the first call to `.advance()`, the stream
/// is an unitialized state. /// is an unitialized state.
pub fn advance(&mut self) -> bool { pub fn advance(&mut self) -> bool {
if let Some((term, term_ord)) = self.stream.next() { if let Some((term, term_ord, state)) = self.stream.next() {
self.current_key.clear(); self.current_key.clear();
self.current_key.extend_from_slice(term); self.current_key.extend_from_slice(term);
self.term_ord = term_ord; self.term_ord = term_ord;
self.current_value = self.fst_map.term_info_from_ord(term_ord); self.current_value = self.fst_map.term_info_from_ord(term_ord);
self.state = state;
true true
} else { } else {
false false
@@ -118,6 +125,10 @@ where
&self.current_key &self.current_key
} }
pub fn state(&self) -> &A::State {
&self.state
}
/// Accesses the current value. /// Accesses the current value.
/// ///
/// Calling `.value()` after the end of the stream will return the /// Calling `.value()` after the end of the stream will return the

View File

@@ -197,7 +197,11 @@ impl TermDictionary {
/// Returns a search builder, to stream all of the terms /// Returns a search builder, to stream all of the terms
/// within the Automaton /// within the Automaton
pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A> { pub fn search<'a, A>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
where
A: Automaton + 'a,
A::State: Clone + Default + Sized,
{
let stream_builder = self.fst_index.search(automaton); let stream_builder = self.fst_index.search(automaton);
TermStreamerBuilder::<A>::new(self, stream_builder) TermStreamerBuilder::<A>::new(self, stream_builder)
} }