This commit is contained in:
Paul Masurel
2017-09-20 22:56:55 +09:00
parent 426cc436da
commit 2c9302290f
8 changed files with 43 additions and 24 deletions

View File

@@ -30,6 +30,11 @@ impl Default for Token {
}
}
// Warning! TODO may change once associated type constructor
// land in nightly.
pub trait Analyzer<'a>: Sized + Clone {
type TokenStreamImpl: TokenStream;

View File

@@ -11,6 +11,14 @@ use analyzer::LowerCaser;
use analyzer::Stemmer;
/// The analyzer manager serves as a store for
/// all of the configured analyzers.
///
/// By default, it is populated with the following managers.
///
/// * raw : does not process nor tokenize the text.
/// * default : Tokenizes according to whitespace and punctuation, removes tokens that are too long, lowercases the
#[derive(Clone)]
pub struct AnalyzerManager {
analyzers: Arc< RwLock<HashMap<String, Box<BoxedAnalyzer> >> >

View File

@@ -67,8 +67,10 @@ impl Index {
Index::from_directory(directory, schema)
}
pub fn analyzers(&self) -> AnalyzerManager {
self.analyzers.clone()
/// Accessor for the analyzer manager.
pub fn analyzers(&self) -> &AnalyzerManager {
&self.analyzers
}
/// Creates a new index in a temp directory.

View File

@@ -11,7 +11,6 @@ use store::StoreReader;
use directory::ReadOnlySource;
use schema::Document;
use DocId;
use std::str;
use std::sync::Arc;
use std::collections::HashMap;
use common::CompositeFile;

View File

@@ -90,8 +90,8 @@ impl SegmentPostings {
SegmentPostings {
block_cursor: segment_block_postings,
cur: COMPRESSION_BLOCK_SIZE, // cursor within the block
delete_bitset: delete_bitset,
position_computer: position_computer,
delete_bitset,
position_computer,
}
}

View File

@@ -89,10 +89,10 @@ impl QueryParser {
default_fields: Vec<Field>,
analyzer_manager: AnalyzerManager) -> QueryParser {
QueryParser {
schema: schema,
default_fields: default_fields,
schema,
default_fields,
analyzer_manager,
conjunction_by_default: false,
analyzer_manager: analyzer_manager,
}
}
@@ -101,7 +101,7 @@ impl QueryParser {
QueryParser::new(
index.schema(),
default_fields,
index.analyzers())
index.analyzers().clone())
}
/// Set the default way to compose queries to a conjunction.
@@ -223,8 +223,8 @@ impl QueryParser {
match user_input_ast {
UserInputAST::Clause(sub_queries) => {
let default_occur = self.default_occur();
let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(
sub_queries
let logical_sub_queries: Vec<(Occur, LogicalAST)> =
try!(sub_queries
.into_iter()
.map(|sub_query| self.compute_logical_ast_with_occur(*sub_query))
.map(|res| {
@@ -232,24 +232,23 @@ impl QueryParser {
(compose_occur(default_occur, occur), sub_ast)
})
})
.collect()
);
.collect());
Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries)))
}
UserInputAST::Not(subquery) => {
let (occur, logical_sub_queries) =
try!(self.compute_logical_ast_with_occur(*subquery));
self.compute_logical_ast_with_occur(*subquery)?;
Ok((compose_occur(Occur::MustNot, occur), logical_sub_queries))
}
UserInputAST::Must(subquery) => {
let (occur, logical_sub_queries) =
try!(self.compute_logical_ast_with_occur(*subquery));
self.compute_logical_ast_with_occur(*subquery)?;
Ok((compose_occur(Occur::Must, occur), logical_sub_queries))
}
UserInputAST::Leaf(literal) => {
let term_phrases: Vec<(Field, String)> = match literal.field_name {
Some(ref field_name) => {
let field = try!(self.resolve_field_name(field_name));
let field = self.resolve_field_name(field_name)?;
vec![(field, literal.phrase.clone())]
}
None => {

View File

@@ -1,18 +1,24 @@
/// Describing the amount of information indexed.
/// `IndexRecordOption` describes an amount of information associated
/// for a given field.
///
/// It is used in the schema to configure how much data should be
/// indexed for a given field.
///
/// It is also used to describe the amount of information that
/// you want to be decoded as you go through a posting list.
///
/// Since decoding information is not free, this makes it possible to
/// avoid this extra cost when the information is not required.
/// For instance, positions are useful when running phrase queries
/// but useless in other queries.
/// but useless for most queries.
///
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Serialize, Deserialize)]
pub enum IndexRecordOption {
#[serde(rename = "basic")]
Basic,
Basic, //< records only the `DocId`s
#[serde(rename = "freq")]
WithFreqs,
WithFreqs, //< records the document ids as well as the term frequency.
#[serde(rename = "position")]
WithFreqsAndPositions,
WithFreqsAndPositions, //< records the document id, the term frequency and the positions of the occurences in the document.
}
impl IndexRecordOption {

View File

@@ -40,7 +40,7 @@ let schema = schema_builder.build();
We can split the problem of generating a search result page into two phases :
* identifying the list of 10 or so documents to be displayed (Conceptually `query -> doc_ids[]`)
* for each of these documents, retrieving the information required to generate the serp page.
* for each of these documents, retrieving the information required to generate the search results page.
(`doc_ids[] -> Document[]`)
In the first phase, the ability to search for documents by the given field is determined by the