Compare commits

...

1 Commits

Author SHA1 Message Date
Paul Masurel
790baa7adf Integrated state into TermDict streamer 2019-08-16 10:29:28 +09:00
4 changed files with 24 additions and 6 deletions

View File

@@ -17,7 +17,7 @@ base64 = "0.10.0"
byteorder = "1.0"
once_cell = "0.2"
regex = "1.0"
tantivy-fst = "0.1"
tantivy-fst = {git="https://github.com/tantivy-search/fst"}
memmap = {version = "0.7", optional=true}
lz4 = {version="1.20", optional=true}
snap = {version="0.2"}

View File

@@ -14,6 +14,7 @@ use tantivy_fst::Automaton;
pub struct AutomatonWeight<A>
where
A: Automaton + Send + Sync + 'static,
A::State: Clone + Default + Sized,
{
field: Field,
automaton: A,
@@ -22,6 +23,7 @@ where
impl<A> AutomatonWeight<A>
where
A: Automaton + Send + Sync + 'static,
A::State: Clone + Default + Sized,
{
/// Create a new AutomationWeight
pub fn new(field: Field, automaton: A) -> AutomatonWeight<A> {
@@ -37,6 +39,7 @@ where
impl<A> Weight for AutomatonWeight<A>
where
A: Automaton + Send + Sync + 'static,
A::State: Clone + Default + Sized,
{
fn scorer(&self, reader: &SegmentReader) -> Result<Box<dyn Scorer>> {
let max_doc = reader.max_doc();

View File

@@ -2,7 +2,7 @@ use super::TermDictionary;
use crate::postings::TermInfo;
use crate::termdict::TermOrdinal;
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::map::{Stream, StreamBuilder};
use tantivy_fst::map::{Stream, StreamBuilder, StreamWithState};
use tantivy_fst::Automaton;
use tantivy_fst::{IntoStreamer, Streamer};
@@ -11,6 +11,7 @@ use tantivy_fst::{IntoStreamer, Streamer};
pub struct TermStreamerBuilder<'a, A = AlwaysMatch>
where
A: Automaton,
A::State: Clone,
{
fst_map: &'a TermDictionary,
stream_builder: StreamBuilder<'a, A>,
@@ -19,6 +20,7 @@ where
impl<'a, A> TermStreamerBuilder<'a, A>
where
A: Automaton,
A::State: Clone + Default + Sized,
{
pub(crate) fn new(fst_map: &'a TermDictionary, stream_builder: StreamBuilder<'a, A>) -> Self {
TermStreamerBuilder {
@@ -56,10 +58,11 @@ where
pub fn into_stream(self) -> TermStreamer<'a, A> {
TermStreamer {
fst_map: self.fst_map,
stream: self.stream_builder.into_stream(),
stream: self.stream_builder.with_state().into_stream(),
term_ord: 0u64,
current_key: Vec::with_capacity(100),
current_value: TermInfo::default(),
state: Default::default(),
}
}
}
@@ -69,27 +72,31 @@ where
pub struct TermStreamer<'a, A = AlwaysMatch>
where
A: Automaton,
A::State: Clone + Default + Sized,
{
fst_map: &'a TermDictionary,
stream: Stream<'a, A>,
stream: StreamWithState<'a, A>,
term_ord: TermOrdinal,
current_key: Vec<u8>,
current_value: TermInfo,
state: A::State,
}
impl<'a, A> TermStreamer<'a, A>
where
A: Automaton,
A::State: Clone + Default + Sized,
{
/// Advance position the stream on the next item.
/// Before the first call to `.advance()`, the stream
/// is an unitialized state.
pub fn advance(&mut self) -> bool {
if let Some((term, term_ord)) = self.stream.next() {
if let Some((term, term_ord, state)) = self.stream.next() {
self.current_key.clear();
self.current_key.extend_from_slice(term);
self.term_ord = term_ord;
self.current_value = self.fst_map.term_info_from_ord(term_ord);
self.state = state;
true
} else {
false
@@ -118,6 +125,10 @@ where
&self.current_key
}
pub fn state(&self) -> &A::State {
&self.state
}
/// Accesses the current value.
///
/// Calling `.value()` after the end of the stream will return the

View File

@@ -197,7 +197,11 @@ impl TermDictionary {
/// Returns a search builder, to stream all of the terms
/// within the Automaton
pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A> {
pub fn search<'a, A>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
where
A: Automaton + 'a,
A::State: Clone + Default + Sized,
{
let stream_builder = self.fst_index.search(automaton);
TermStreamerBuilder::<A>::new(self, stream_builder)
}