mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
clippy first stab (#711)
This commit is contained in:
@@ -13,44 +13,29 @@ use crate::SegmentReader;
|
||||
/// use tantivy::collector::Count;
|
||||
/// use tantivy::query::QueryParser;
|
||||
/// use tantivy::schema::{Schema, TEXT};
|
||||
/// use tantivy::{doc, Index, Result};
|
||||
/// use tantivy::{doc, Index};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer(3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit().unwrap();
|
||||
/// }
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
///
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
/// let mut index_writer = index.writer(3_000_000).unwrap();
|
||||
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
||||
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
|
||||
/// assert!(index_writer.commit().is_ok());
|
||||
///
|
||||
/// {
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary")?;
|
||||
/// let count = searcher.search(&query, &Count).unwrap();
|
||||
/// let reader = index.reader().unwrap();
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// assert_eq!(count, 2);
|
||||
/// }
|
||||
/// // Here comes the important part
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary").unwrap();
|
||||
/// let count = searcher.search(&query, &Count).unwrap();
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// assert_eq!(count, 2);
|
||||
/// ```
|
||||
pub struct Count;
|
||||
|
||||
|
||||
@@ -86,7 +86,6 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
|
||||
/// use tantivy::schema::{Facet, Schema, TEXT};
|
||||
/// use tantivy::{doc, Index, Result};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
///
|
||||
@@ -172,6 +171,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// # assert!(example().is_ok());
|
||||
/// ```
|
||||
pub struct FacetCollector {
|
||||
field: Field,
|
||||
|
||||
@@ -108,49 +108,35 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
|
||||
/// use tantivy::collector::{Count, TopDocs, MultiCollector};
|
||||
/// use tantivy::query::QueryParser;
|
||||
/// use tantivy::schema::{Schema, TEXT};
|
||||
/// use tantivy::{doc, Index, Result};
|
||||
/// use tantivy::{doc, Index};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer(3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit().unwrap();
|
||||
/// }
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
///
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
/// let mut index_writer = index.writer(3_000_000).unwrap();
|
||||
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
||||
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
|
||||
/// assert!(index_writer.commit().is_ok());
|
||||
///
|
||||
/// let mut collectors = MultiCollector::new();
|
||||
/// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2));
|
||||
/// let count_handle = collectors.add_collector(Count);
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary")?;
|
||||
/// let mut multi_fruit = searcher.search(&query, &collectors)?;
|
||||
/// let reader = index.reader().unwrap();
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// let count = count_handle.extract(&mut multi_fruit);
|
||||
/// let top_docs = top_docs_handle.extract(&mut multi_fruit);
|
||||
/// let mut collectors = MultiCollector::new();
|
||||
/// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2));
|
||||
/// let count_handle = collectors.add_collector(Count);
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary").unwrap();
|
||||
/// let mut multi_fruit = searcher.search(&query, &collectors).unwrap();
|
||||
///
|
||||
/// # assert_eq!(count, 2);
|
||||
/// # assert_eq!(top_docs.len(), 2);
|
||||
/// let count = count_handle.extract(&mut multi_fruit);
|
||||
/// let top_docs = top_docs_handle.extract(&mut multi_fruit);
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// assert_eq!(count, 2);
|
||||
/// assert_eq!(top_docs.len(), 2);
|
||||
/// ```
|
||||
#[allow(clippy::type_complexity)]
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -29,43 +29,29 @@ use std::fmt;
|
||||
/// use tantivy::collector::TopDocs;
|
||||
/// use tantivy::query::QueryParser;
|
||||
/// use tantivy::schema::{Schema, TEXT};
|
||||
/// use tantivy::{doc, DocAddress, Index, Result};
|
||||
/// use tantivy::{doc, DocAddress, Index};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit().unwrap();
|
||||
/// }
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
///
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
/// index_writer.add_document(doc!(title => "The Name of the Wind"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of Muadib"));
|
||||
/// index_writer.add_document(doc!(title => "A Dairy Cow"));
|
||||
/// index_writer.add_document(doc!(title => "The Diary of a Young Girl"));
|
||||
/// assert!(index_writer.commit().is_ok());
|
||||
///
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary")?;
|
||||
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?;
|
||||
/// let reader = index.reader().unwrap();
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// assert_eq!(&top_docs[0], &(0.7261542, DocAddress(0, 1)));
|
||||
/// assert_eq!(&top_docs[1], &(0.6099695, DocAddress(0, 3)));
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
/// let query = query_parser.parse_query("diary").unwrap();
|
||||
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// assert_eq!(&top_docs[0], &(0.7261542, DocAddress(0, 1)));
|
||||
/// assert_eq!(&top_docs[1], &(0.6099695, DocAddress(0, 3)));
|
||||
/// ```
|
||||
pub struct TopDocs(TopCollector<Score>);
|
||||
|
||||
@@ -102,15 +88,12 @@ impl TopDocs {
|
||||
/// #
|
||||
/// # let index = Index::create_in_ram(schema);
|
||||
/// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
||||
/// # index_writer.add_document(doc!(
|
||||
/// # title => "The Name of the Wind",
|
||||
/// # rating => 92u64,
|
||||
/// # ));
|
||||
/// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64));
|
||||
/// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64));
|
||||
/// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64));
|
||||
/// # index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64));
|
||||
/// # index_writer.commit()?;
|
||||
/// # let reader = index.reader()?;
|
||||
/// # assert!(index_writer.commit().is_ok());
|
||||
/// # let reader = index.reader().unwrap();
|
||||
/// # let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?;
|
||||
/// # let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?;
|
||||
/// # assert_eq!(top_docs,
|
||||
@@ -202,27 +185,33 @@ impl TopDocs {
|
||||
/// use tantivy::collector::TopDocs;
|
||||
/// use tantivy::schema::Field;
|
||||
///
|
||||
/// # fn create_schema() -> Schema {
|
||||
/// # let mut schema_builder = Schema::builder();
|
||||
/// # schema_builder.add_text_field("product_name", TEXT);
|
||||
/// # schema_builder.add_u64_field("popularity", FAST);
|
||||
/// # schema_builder.build()
|
||||
/// # }
|
||||
/// #
|
||||
/// # fn main() -> tantivy::Result<()> {
|
||||
/// # let schema = create_schema();
|
||||
/// # let index = Index::create_in_ram(schema);
|
||||
/// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
||||
/// # let product_name = index.schema().get_field("product_name").unwrap();
|
||||
/// #
|
||||
/// fn create_schema() -> Schema {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// schema_builder.add_text_field("product_name", TEXT);
|
||||
/// schema_builder.add_u64_field("popularity", FAST);
|
||||
/// schema_builder.build()
|
||||
/// }
|
||||
///
|
||||
/// fn create_index() -> tantivy::Result<Index> {
|
||||
/// let schema = create_schema();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?;
|
||||
/// let product_name = index.schema().get_field("product_name").unwrap();
|
||||
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
|
||||
/// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
|
||||
/// index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64));
|
||||
/// index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64));
|
||||
/// index_writer.commit()?;
|
||||
/// Ok(index)
|
||||
/// }
|
||||
///
|
||||
/// let index = create_index().unwrap();
|
||||
/// let product_name = index.schema().get_field("product_name").unwrap();
|
||||
/// let popularity: Field = index.schema().get_field("popularity").unwrap();
|
||||
/// # index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64));
|
||||
/// # index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64));
|
||||
/// # index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64));
|
||||
/// # index_writer.commit()?;
|
||||
/// // ...
|
||||
/// # let user_query = "diary";
|
||||
/// # let query = QueryParser::for_index(&index, vec![product_name]).parse_query(user_query)?;
|
||||
///
|
||||
/// let user_query_str = "diary";
|
||||
/// let query_parser = QueryParser::for_index(&index, vec![product_name]);
|
||||
/// let query = query_parser.parse_query(user_query_str).unwrap();
|
||||
///
|
||||
/// // This is where we build our collector with our custom score.
|
||||
/// let top_docs_by_custom_score = TopDocs
|
||||
@@ -249,15 +238,12 @@ impl TopDocs {
|
||||
/// popularity_boost_score * original_score
|
||||
/// }
|
||||
/// });
|
||||
/// # let reader = index.reader()?;
|
||||
/// # let searcher = reader.searcher();
|
||||
/// let reader = index.reader().unwrap();
|
||||
/// let searcher = reader.searcher();
|
||||
/// // ... and here are our documents. Note this is a simple vec.
|
||||
/// // The `Score` in the pair is our tweaked score.
|
||||
/// let resulting_docs: Vec<(Score, DocAddress)> =
|
||||
/// searcher.search(&*query, &top_docs_by_custom_score)?;
|
||||
///
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// searcher.search(&query, &top_docs_by_custom_score).unwrap();
|
||||
/// ```
|
||||
///
|
||||
/// # See also
|
||||
|
||||
@@ -191,7 +191,7 @@ impl Directory for RAMDirectory {
|
||||
// Reserve the path to prevent calls to .write() to succeed.
|
||||
self.fs.write().unwrap().write(path_buf.clone(), &[]);
|
||||
|
||||
let mut vec_writer = VecWriter::new(path_buf.clone(), self.clone());
|
||||
let mut vec_writer = VecWriter::new(path_buf, self.clone());
|
||||
vec_writer.write_all(data)?;
|
||||
vec_writer.flush()?;
|
||||
if path == Path::new(&*META_FILEPATH) {
|
||||
|
||||
@@ -138,7 +138,7 @@ fn merge(
|
||||
|
||||
let segment_meta = index.new_segment_meta(merged_segment.id(), num_docs);
|
||||
|
||||
Ok(SegmentEntry::new(segment_meta.clone(), delete_cursor, None))
|
||||
Ok(SegmentEntry::new(segment_meta, delete_cursor, None))
|
||||
}
|
||||
|
||||
pub(crate) struct InnerSegmentUpdater {
|
||||
|
||||
@@ -54,21 +54,21 @@ where
|
||||
match self.excluding_state {
|
||||
State::ExcludeOne(excluded_doc) => {
|
||||
if doc == excluded_doc {
|
||||
false
|
||||
} else if excluded_doc > doc {
|
||||
true
|
||||
} else {
|
||||
match self.excluding_docset.skip_next(doc) {
|
||||
SkipResult::OverStep => {
|
||||
self.excluding_state = State::ExcludeOne(self.excluding_docset.doc());
|
||||
true
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.excluding_state = State::Finished;
|
||||
true
|
||||
}
|
||||
SkipResult::Reached => false,
|
||||
return false;
|
||||
}
|
||||
if excluded_doc > doc {
|
||||
return true;
|
||||
}
|
||||
match self.excluding_docset.skip_next(doc) {
|
||||
SkipResult::OverStep => {
|
||||
self.excluding_state = State::ExcludeOne(self.excluding_docset.doc());
|
||||
true
|
||||
}
|
||||
SkipResult::End => {
|
||||
self.excluding_state = State::Finished;
|
||||
true
|
||||
}
|
||||
SkipResult::Reached => false,
|
||||
}
|
||||
}
|
||||
State::Finished => true,
|
||||
|
||||
@@ -33,7 +33,6 @@ static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Laz
|
||||
/// use tantivy::schema::{Schema, TEXT};
|
||||
/// use tantivy::{doc, Index, Result, Term};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
@@ -59,7 +58,6 @@ static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Laz
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// {
|
||||
///
|
||||
/// let term = Term::from_field_text(title, "Diary");
|
||||
/// let query = FuzzyTermQuery::new(term, 1, true);
|
||||
/// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count)).unwrap();
|
||||
@@ -69,6 +67,7 @@ static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Laz
|
||||
///
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// # assert!(example().is_ok());
|
||||
/// ```
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FuzzyTermQuery {
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::postings::Postings;
|
||||
use crate::query::bm25::BM25Weight;
|
||||
use crate::query::{Intersection, Scorer};
|
||||
use crate::DocId;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
struct PostingsWithOffset<TPostings> {
|
||||
offset: u32,
|
||||
@@ -59,12 +60,16 @@ fn intersection_exists(left: &[u32], right: &[u32]) -> bool {
|
||||
while left_i < left.len() && right_i < right.len() {
|
||||
let left_val = left[left_i];
|
||||
let right_val = right[right_i];
|
||||
if left_val < right_val {
|
||||
left_i += 1;
|
||||
} else if right_val < left_val {
|
||||
right_i += 1;
|
||||
} else {
|
||||
return true;
|
||||
match left_val.cmp(&right_val) {
|
||||
Ordering::Less => {
|
||||
left_i += 1;
|
||||
}
|
||||
Ordering::Equal => {
|
||||
return true;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
right_i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
@@ -77,14 +82,18 @@ fn intersection_count(left: &[u32], right: &[u32]) -> usize {
|
||||
while left_i < left.len() && right_i < right.len() {
|
||||
let left_val = left[left_i];
|
||||
let right_val = right[right_i];
|
||||
if left_val < right_val {
|
||||
left_i += 1;
|
||||
} else if right_val < left_val {
|
||||
right_i += 1;
|
||||
} else {
|
||||
count += 1;
|
||||
left_i += 1;
|
||||
right_i += 1;
|
||||
match left_val.cmp(&right_val) {
|
||||
Ordering::Less => {
|
||||
left_i += 1;
|
||||
}
|
||||
Ordering::Equal => {
|
||||
count += 1;
|
||||
left_i += 1;
|
||||
right_i += 1;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
right_i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
count
|
||||
@@ -103,15 +112,19 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize {
|
||||
while left_i < left_len && right_i < right_len {
|
||||
let left_val = left[left_i];
|
||||
let right_val = right[right_i];
|
||||
if left_val < right_val {
|
||||
left_i += 1;
|
||||
} else if right_val < left_val {
|
||||
right_i += 1;
|
||||
} else {
|
||||
left[count] = left_val;
|
||||
count += 1;
|
||||
left_i += 1;
|
||||
right_i += 1;
|
||||
match left_val.cmp(&right_val) {
|
||||
Ordering::Less => {
|
||||
left_i += 1;
|
||||
}
|
||||
Ordering::Equal => {
|
||||
left[count] = left_val;
|
||||
count += 1;
|
||||
left_i += 1;
|
||||
right_i += 1;
|
||||
}
|
||||
Ordering::Greater => {
|
||||
right_i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
count
|
||||
|
||||
@@ -38,41 +38,33 @@ fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// # use tantivy::collector::Count;
|
||||
/// # use tantivy::query::RangeQuery;
|
||||
/// # use tantivy::schema::{Schema, INDEXED};
|
||||
/// # use tantivy::{doc, Index, Result};
|
||||
/// #
|
||||
/// # fn run() -> Result<()> {
|
||||
/// # let mut schema_builder = Schema::builder();
|
||||
/// # let year_field = schema_builder.add_u64_field("year", INDEXED);
|
||||
/// # let schema = schema_builder.build();
|
||||
/// #
|
||||
/// # let index = Index::create_in_ram(schema);
|
||||
/// # {
|
||||
/// # let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
|
||||
/// # for year in 1950u64..2017u64 {
|
||||
/// # let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
||||
/// # for _ in 0..num_docs_within_year {
|
||||
/// # index_writer.add_document(doc!(year_field => year));
|
||||
/// # }
|
||||
/// # }
|
||||
/// # index_writer.commit().unwrap();
|
||||
/// # }
|
||||
/// # let reader = index.reader()?;
|
||||
/// use tantivy::collector::Count;
|
||||
/// use tantivy::query::RangeQuery;
|
||||
/// use tantivy::schema::{Schema, INDEXED};
|
||||
/// use tantivy::{doc, Index};
|
||||
/// # fn test() -> tantivy::Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let year_field = schema_builder.add_u64_field("year", INDEXED);
|
||||
/// let schema = schema_builder.build();
|
||||
///
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?;
|
||||
/// for year in 1950u64..2017u64 {
|
||||
/// let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
|
||||
/// for _ in 0..num_docs_within_year {
|
||||
/// index_writer.add_document(doc!(year_field => year));
|
||||
/// }
|
||||
/// }
|
||||
/// index_writer.commit()?;
|
||||
///
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970);
|
||||
///
|
||||
/// let num_60s_books = searcher.search(&docs_in_the_sixties, &Count)?;
|
||||
///
|
||||
/// # assert_eq!(num_60s_books, 2285);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # run().unwrap()
|
||||
/// assert_eq!(num_60s_books, 2285);
|
||||
/// Ok(())
|
||||
/// # }
|
||||
/// # assert!(test().is_ok());
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RangeQuery {
|
||||
|
||||
@@ -15,40 +15,40 @@ use tantivy_fst::Regex;
|
||||
/// use tantivy::collector::Count;
|
||||
/// use tantivy::query::RegexQuery;
|
||||
/// use tantivy::schema::{Schema, TEXT};
|
||||
/// use tantivy::{doc, Index, Result, Term};
|
||||
/// use tantivy::{doc, Index, Term};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer(3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit().unwrap();
|
||||
/// }
|
||||
///
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// let term = Term::from_field_text(title, "Diary");
|
||||
/// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?;
|
||||
/// let count = searcher.search(&query, &Count)?;
|
||||
/// assert_eq!(count, 3);
|
||||
/// Ok(())
|
||||
/// # fn test() -> tantivy::Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer(3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit().unwrap();
|
||||
/// }
|
||||
///
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// let term = Term::from_field_text(title, "Diary");
|
||||
/// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?;
|
||||
/// let count = searcher.search(&query, &Count)?;
|
||||
/// assert_eq!(count, 3);
|
||||
/// Ok(())
|
||||
/// # }
|
||||
/// # assert!(test().is_ok());
|
||||
/// ```
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegexQuery {
|
||||
|
||||
@@ -23,42 +23,39 @@ use std::fmt;
|
||||
/// use tantivy::collector::{Count, TopDocs};
|
||||
/// use tantivy::query::TermQuery;
|
||||
/// use tantivy::schema::{Schema, TEXT, IndexRecordOption};
|
||||
/// use tantivy::{doc, Index, Result, Term};
|
||||
///
|
||||
/// # fn main() { example().unwrap(); }
|
||||
/// fn example() -> Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer(3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit()?;
|
||||
/// }
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
///
|
||||
/// let query = TermQuery::new(
|
||||
/// Term::from_field_text(title, "diary"),
|
||||
/// IndexRecordOption::Basic,
|
||||
/// );
|
||||
/// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count)).unwrap();
|
||||
/// assert_eq!(count, 2);
|
||||
///
|
||||
/// Ok(())
|
||||
/// use tantivy::{doc, Index, Term};
|
||||
/// # fn test() -> tantivy::Result<()> {
|
||||
/// let mut schema_builder = Schema::builder();
|
||||
/// let title = schema_builder.add_text_field("title", TEXT);
|
||||
/// let schema = schema_builder.build();
|
||||
/// let index = Index::create_in_ram(schema);
|
||||
/// {
|
||||
/// let mut index_writer = index.writer(3_000_000)?;
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Name of the Wind",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of Muadib",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "A Dairy Cow",
|
||||
/// ));
|
||||
/// index_writer.add_document(doc!(
|
||||
/// title => "The Diary of a Young Girl",
|
||||
/// ));
|
||||
/// index_writer.commit()?;
|
||||
/// }
|
||||
/// let reader = index.reader()?;
|
||||
/// let searcher = reader.searcher();
|
||||
/// let query = TermQuery::new(
|
||||
/// Term::from_field_text(title, "diary"),
|
||||
/// IndexRecordOption::Basic,
|
||||
/// );
|
||||
/// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;
|
||||
/// assert_eq!(count, 2);
|
||||
/// Ok(())
|
||||
/// # }
|
||||
/// # assert!(test().is_ok());
|
||||
/// ```
|
||||
#[derive(Clone)]
|
||||
pub struct TermQuery {
|
||||
|
||||
@@ -53,7 +53,7 @@ where
|
||||
fn bitor(self, head: SchemaFlagList<Head, ()>) -> Self::Output {
|
||||
SchemaFlagList {
|
||||
head: head.head,
|
||||
tail: self.clone(),
|
||||
tail: self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
//! ```rust
|
||||
//! use tantivy::tokenizer::*;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//!
|
||||
//! let tokenizer = RawTokenizer
|
||||
//! .filter(AlphaNumOnlyFilter);
|
||||
//!
|
||||
@@ -20,7 +18,6 @@
|
||||
//! assert!(stream.next().is_some());
|
||||
//! // the "emoji" is dropped because its not an alphanum
|
||||
//! assert!(stream.next().is_none());
|
||||
//! # }
|
||||
//! ```
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
//! ```rust
|
||||
//! use tantivy::schema::*;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! let mut schema_builder = Schema::builder();
|
||||
//!
|
||||
//! let text_options = TextOptions::default()
|
||||
@@ -31,7 +30,6 @@
|
||||
//! schema_builder.add_text_field("uuid", id_options);
|
||||
//!
|
||||
//! let schema = schema_builder.build();
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! By default, `tantivy` offers the following tokenizers:
|
||||
@@ -66,12 +64,10 @@
|
||||
//! ```rust
|
||||
//! use tantivy::tokenizer::*;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! let en_stem = SimpleTokenizer
|
||||
//! .filter(RemoveLongFilter::limit(40))
|
||||
//! .filter(LowerCaser)
|
||||
//! .filter(Stemmer::new(Language::English));
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! Once your tokenizer is defined, you need to
|
||||
@@ -81,13 +77,12 @@
|
||||
//! # use tantivy::schema::Schema;
|
||||
//! # use tantivy::tokenizer::*;
|
||||
//! # use tantivy::Index;
|
||||
//! # fn main() {
|
||||
//! # let custom_en_tokenizer = SimpleTokenizer;
|
||||
//! #
|
||||
//! let custom_en_tokenizer = SimpleTokenizer;
|
||||
//! # let schema = Schema::builder().build();
|
||||
//! let index = Index::create_in_ram(schema);
|
||||
//! index.tokenizers()
|
||||
//! .register("custom_en", custom_en_tokenizer);
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! If you built your schema programmatically, a complete example
|
||||
@@ -102,7 +97,6 @@
|
||||
//! use tantivy::tokenizer::*;
|
||||
//! use tantivy::Index;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! let mut schema_builder = Schema::builder();
|
||||
//! let text_field_indexing = TextFieldIndexing::default()
|
||||
//! .set_tokenizer("custom_en")
|
||||
@@ -121,8 +115,6 @@
|
||||
//! index
|
||||
//! .tokenizers()
|
||||
//! .register("custom_en", custom_en_tokenizer);
|
||||
//! // ...
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
mod alphanum_only;
|
||||
|
||||
@@ -31,7 +31,7 @@ use super::{Token, TokenStream, Tokenizer};
|
||||
///
|
||||
/// ```rust
|
||||
/// use tantivy::tokenizer::*;
|
||||
/// # fn main() {
|
||||
///
|
||||
/// let tokenizer = NgramTokenizer::new(2, 3, false);
|
||||
/// let mut stream = tokenizer.token_stream("hello");
|
||||
/// {
|
||||
@@ -77,7 +77,6 @@ use super::{Token, TokenStream, Tokenizer};
|
||||
/// assert_eq!(token.offset_to, 5);
|
||||
/// }
|
||||
/// assert!(stream.next().is_none());
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Clone)]
|
||||
pub struct NgramTokenizer {
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
//! ```rust
|
||||
//! use tantivy::tokenizer::*;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//!
|
||||
//! let tokenizer = SimpleTokenizer
|
||||
//! .filter(RemoveLongFilter::limit(5));
|
||||
//!
|
||||
@@ -12,7 +10,6 @@
|
||||
//! // out of the token stream.
|
||||
//! assert_eq!(stream.next().unwrap().text, "nice");
|
||||
//! assert!(stream.next().is_none());
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
//! ```rust
|
||||
//! use tantivy::tokenizer::*;
|
||||
//!
|
||||
//! # fn main() {
|
||||
//! let tokenizer = SimpleTokenizer
|
||||
//! .filter(StopWordFilter::remove(vec!["the".to_string(), "is".to_string()]));
|
||||
//!
|
||||
@@ -10,7 +9,6 @@
|
||||
//! assert_eq!(stream.next().unwrap().text, "fox");
|
||||
//! assert_eq!(stream.next().unwrap().text, "crafty");
|
||||
//! assert!(stream.next().is_none());
|
||||
//! # }
|
||||
//! ```
|
||||
use super::{Token, TokenFilter, TokenStream};
|
||||
use fnv::FnvHasher;
|
||||
@@ -46,7 +44,7 @@ impl StopWordFilter {
|
||||
"there", "these", "they", "this", "to", "was", "will", "with",
|
||||
];
|
||||
|
||||
StopWordFilter::remove(words.iter().map(|s| s.to_string()).collect())
|
||||
StopWordFilter::remove(words.iter().map(|&s| s.to_string()).collect())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -58,12 +58,10 @@ pub trait Tokenizer<'a>: Sized + Clone {
|
||||
/// ```rust
|
||||
/// use tantivy::tokenizer::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// let en_stem = SimpleTokenizer
|
||||
/// .filter(RemoveLongFilter::limit(40))
|
||||
/// .filter(LowerCaser)
|
||||
/// .filter(Stemmer::default());
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
fn filter<NewFilter>(self, new_filter: NewFilter) -> ChainTokenizer<NewFilter, Self>
|
||||
@@ -188,7 +186,6 @@ impl<'b> TokenStream for Box<dyn TokenStream + 'b> {
|
||||
/// ```
|
||||
/// use tantivy::tokenizer::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// let tokenizer = SimpleTokenizer
|
||||
/// .filter(RemoveLongFilter::limit(40))
|
||||
/// .filter(LowerCaser);
|
||||
@@ -207,7 +204,6 @@ impl<'b> TokenStream for Box<dyn TokenStream + 'b> {
|
||||
/// assert_eq!(token.offset_to, 12);
|
||||
/// assert_eq!(token.position, 1);
|
||||
/// }
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
pub trait TokenStream {
|
||||
@@ -227,17 +223,15 @@ pub trait TokenStream {
|
||||
/// and `.token()`.
|
||||
///
|
||||
/// ```
|
||||
/// # use tantivy::tokenizer::*;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # let tokenizer = SimpleTokenizer
|
||||
/// # .filter(RemoveLongFilter::limit(40))
|
||||
/// # .filter(LowerCaser);
|
||||
/// use tantivy::tokenizer::*;
|
||||
///
|
||||
/// let tokenizer = SimpleTokenizer
|
||||
/// .filter(RemoveLongFilter::limit(40))
|
||||
/// .filter(LowerCaser);
|
||||
/// let mut token_stream = tokenizer.token_stream("Hello, happy tax payer");
|
||||
/// while let Some(token) = token_stream.next() {
|
||||
/// println!("Token {:?}", token.text);
|
||||
/// }
|
||||
/// # }
|
||||
/// ```
|
||||
fn next(&mut self) -> Option<&Token> {
|
||||
if self.advance() {
|
||||
|
||||
Reference in New Issue
Block a user