Cargo Format (#420)

This commit is contained in:
Paul Masurel
2018-09-15 07:44:22 +09:00
committed by GitHub
parent 0ba1cf93f7
commit 37e4280c0a
71 changed files with 697 additions and 650 deletions

View File

@@ -10,7 +10,6 @@
// - search for the best document matchings "sea whale"
// - retrieve the best document original content.
extern crate tempdir;
// ---
@@ -235,9 +234,7 @@ fn main() -> tantivy::Result<()> {
println!("{}", schema.to_json(&retrieved_doc));
}
Ok(())
}
use tempdir::TempDir;

View File

@@ -3,7 +3,6 @@
// In this example, we'll see how to define a tokenizer pipeline
// by aligning a bunch of `TokenFilter`.
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopCollector;
@@ -12,7 +11,6 @@ use tantivy::schema::*;
use tantivy::tokenizer::NgramTokenizer;
use tantivy::Index;
fn main() -> tantivy::Result<()> {
// # Defining the schema
//

View File

@@ -11,10 +11,9 @@
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopCollector;
use tantivy::query::TermQuery;
use tantivy::schema::*;
use tantivy::Index;
use tantivy::query::TermQuery;
// A simple helper function to fetch a single document
// given its id from our index.
@@ -31,7 +30,7 @@ fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result<Op
let mut top_collector = TopCollector::with_limit(1);
searcher.search(&term_query, &mut top_collector)?;
if let Some(doc_address) = top_collector.docs().first() {
if let Some(doc_address) = top_collector.docs().first() {
let doc = searcher.doc(doc_address)?;
Ok(Some(doc))
} else {
@@ -41,7 +40,6 @@ fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result<Op
}
fn main() -> tantivy::Result<()> {
// # Defining the schema
//
// Check out the *basic_search* example if this makes
@@ -126,7 +124,6 @@ fn main() -> tantivy::Result<()> {
isbn => "978-9176370711",
));
// You are guaranteed that your clients will only observe your index in
// the state it was in after a commit.
// In this example, your search engine will at no point be missing the *Frankenstein* document.
@@ -143,4 +140,4 @@ fn main() -> tantivy::Result<()> {
);
Ok(())
}
}

View File

@@ -22,60 +22,60 @@ use tantivy::schema::*;
use tantivy::Index;
fn main() -> tantivy::Result<()> {
// Let's create a temporary directory for the
// sake of this example
let index_path = TempDir::new("tantivy_facet_example_dir")?;
let mut schema_builder = SchemaBuilder::default();
// Let's create a temporary directory for the
// sake of this example
let index_path = TempDir::new("tantivy_facet_example_dir")?;
let mut schema_builder = SchemaBuilder::default();
schema_builder.add_text_field("name", TEXT | STORED);
schema_builder.add_text_field("name", TEXT | STORED);
// this is our faceted field
schema_builder.add_facet_field("tags");
// this is our faceted field
schema_builder.add_facet_field("tags");
let schema = schema_builder.build();
let schema = schema_builder.build();
let index = Index::create_in_dir(&index_path, schema.clone())?;
let index = Index::create_in_dir(&index_path, schema.clone())?;
let mut index_writer = index.writer(50_000_000)?;
let mut index_writer = index.writer(50_000_000)?;
let name = schema.get_field("name").unwrap();
let tags = schema.get_field("tags").unwrap();
let name = schema.get_field("name").unwrap();
let tags = schema.get_field("tags").unwrap();
// For convenience, tantivy also comes with a macro to
// reduce the boilerplate above.
index_writer.add_document(doc!(
// For convenience, tantivy also comes with a macro to
// reduce the boilerplate above.
index_writer.add_document(doc!(
name => "the ditch",
tags => Facet::from("/pools/north")
));
index_writer.add_document(doc!(
index_writer.add_document(doc!(
name => "little stacey",
tags => Facet::from("/pools/south")
));
index_writer.commit()?;
index_writer.commit()?;
index.load_searchers()?;
index.load_searchers()?;
let searcher = index.searcher();
let searcher = index.searcher();
let mut facet_collector = FacetCollector::for_field(tags);
facet_collector.add_facet("/pools");
let mut facet_collector = FacetCollector::for_field(tags);
facet_collector.add_facet("/pools");
searcher.search(&AllQuery, &mut facet_collector).unwrap();
searcher.search(&AllQuery, &mut facet_collector).unwrap();
let counts = facet_collector.harvest();
// This lists all of the facet counts
let facets: Vec<(&Facet, u64)> = counts.get("/pools").collect();
assert_eq!(
facets,
vec![
(&Facet::from("/pools/north"), 1),
(&Facet::from("/pools/south"), 1)
]
);
let counts = facet_collector.harvest();
// This lists all of the facet counts
let facets: Vec<(&Facet, u64)> = counts.get("/pools").collect();
assert_eq!(
facets,
vec![
(&Facet::from("/pools/north"), 1),
(&Facet::from("/pools/south"), 1),
]
);
Ok(())
Ok(())
}
use tempdir::TempDir;

View File

@@ -7,18 +7,15 @@
// the list of documents containing a term, getting
// its term frequency, and accessing its positions.
// ---
// Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::schema::*;
use tantivy::Index;
use tantivy::{DocSet, DocId, Postings};
use tantivy::{DocId, DocSet, Postings};
fn main() -> tantivy::Result<()> {
// We first create a schema for the sake of the
// example. Check the `basic_search` example for more information.
let mut schema_builder = SchemaBuilder::default();
@@ -47,7 +44,6 @@ fn main() -> tantivy::Result<()> {
// there is actually only one segment here, but let's iterate through the list
// anyway)
for segment_reader in searcher.segment_readers() {
// A segment contains different data structure.
// Inverted index stands for the combination of
// - the term dictionary
@@ -58,19 +54,18 @@ fn main() -> tantivy::Result<()> {
// Let's go through all docs containing the term `title:the` and access their position
let term_the = Term::from_field_text(title, "the");
// This segment posting object is like a cursor over the documents matching the term.
// The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
// and positions.
//
// If you don't need all this information, you may get better performance by decompressing less
// information.
if let Some(mut segment_postings) = inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions) {
if let Some(mut segment_postings) =
inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)
{
// this buffer will be used to request for positions
let mut positions: Vec<u32> = Vec::with_capacity(100);
while segment_postings.advance() {
// the number of time the term appears in the document.
let doc_id: DocId = segment_postings.doc(); //< do not try to access this before calling advance once.
@@ -98,7 +93,6 @@ fn main() -> tantivy::Result<()> {
}
}
// A `Term` is a text token associated with a field.
// Let's go through all docs containing the term `title:the` and access their position
let term_the = Term::from_field_text(title, "the");
@@ -111,7 +105,6 @@ fn main() -> tantivy::Result<()> {
// Also, for some VERY specific high performance use case like an OLAP analysis of logs,
// you can get better performance by accessing directly the blocks of doc ids.
for segment_reader in searcher.segment_readers() {
// A segment contains different data structure.
// Inverted index stands for the combination of
// - the term dictionary
@@ -124,7 +117,9 @@ fn main() -> tantivy::Result<()> {
//
// If you don't need all this information, you may get better performance by decompressing less
// information.
if let Some(mut block_segment_postings) = inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic) {
if let Some(mut block_segment_postings) =
inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)
{
while block_segment_postings.advance() {
// Once again these docs MAY contains deleted documents as well.
let docs = block_segment_postings.docs();
@@ -136,4 +131,3 @@ fn main() -> tantivy::Result<()> {
Ok(())
}

View File

@@ -66,6 +66,6 @@ fn main() -> tantivy::Result<()> {
println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
println!("snippet: {}", snippet.to_html());
}
Ok(())
}

View File

@@ -22,59 +22,59 @@ use tantivy::tokenizer::*;
use tantivy::Index;
fn main() -> tantivy::Result<()> {
// this example assumes you understand the content in `basic_search`
let mut schema_builder = SchemaBuilder::default();
// this example assumes you understand the content in `basic_search`
let mut schema_builder = SchemaBuilder::default();
// This configures your custom options for how tantivy will
// store and process your content in the index; The key
// to note is that we are setting the tokenizer to `stoppy`
// which will be defined and registered below.
let text_field_indexing = TextFieldIndexing::default()
.set_tokenizer("stoppy")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let text_options = TextOptions::default()
.set_indexing_options(text_field_indexing)
.set_stored();
// This configures your custom options for how tantivy will
// store and process your content in the index; The key
// to note is that we are setting the tokenizer to `stoppy`
// which will be defined and registered below.
let text_field_indexing = TextFieldIndexing::default()
.set_tokenizer("stoppy")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let text_options = TextOptions::default()
.set_indexing_options(text_field_indexing)
.set_stored();
// Our first field is title.
schema_builder.add_text_field("title", text_options);
// Our first field is title.
schema_builder.add_text_field("title", text_options);
// Our second field is body.
let text_field_indexing = TextFieldIndexing::default()
.set_tokenizer("stoppy")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let text_options = TextOptions::default()
.set_indexing_options(text_field_indexing)
.set_stored();
schema_builder.add_text_field("body", text_options);
// Our second field is body.
let text_field_indexing = TextFieldIndexing::default()
.set_tokenizer("stoppy")
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
let text_options = TextOptions::default()
.set_indexing_options(text_field_indexing)
.set_stored();
schema_builder.add_text_field("body", text_options);
let schema = schema_builder.build();
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let index = Index::create_in_ram(schema.clone());
// This tokenizer lowers all of the text (to help with stop word matching)
// then removes all instances of `the` and `and` from the corpus
let tokenizer = SimpleTokenizer
.filter(LowerCaser)
.filter(StopWordFilter::remove(vec![
"the".to_string(),
"and".to_string(),
]));
// This tokenizer lowers all of the text (to help with stop word matching)
// then removes all instances of `the` and `and` from the corpus
let tokenizer = SimpleTokenizer
.filter(LowerCaser)
.filter(StopWordFilter::remove(vec![
"the".to_string(),
"and".to_string(),
]));
index.tokenizers().register("stoppy", tokenizer);
index.tokenizers().register("stoppy", tokenizer);
let mut index_writer = index.writer(50_000_000)?;
let mut index_writer = index.writer(50_000_000)?;
let title = schema.get_field("title").unwrap();
let body = schema.get_field("body").unwrap();
let title = schema.get_field("title").unwrap();
let body = schema.get_field("body").unwrap();
index_writer.add_document(doc!(
index_writer.add_document(doc!(
title => "The Old Man and the Sea",
body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
he had gone eighty-four days now without taking a fish."
));
index_writer.add_document(doc!(
index_writer.add_document(doc!(
title => "Of Mice and Men",
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
bank and runs deep and green. The water is warm too, for it has slipped twinkling \
@@ -86,7 +86,7 @@ fn main() -> tantivy::Result<()> {
limbs and branches that arch over the pool"
));
index_writer.add_document(doc!(
index_writer.add_document(doc!(
title => "Frankenstein",
body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
enterprise which you have regarded with such evil forebodings. I arrived here \
@@ -94,28 +94,28 @@ fn main() -> tantivy::Result<()> {
increasing confidence in the success of my undertaking."
));
index_writer.commit()?;
index_writer.commit()?;
index.load_searchers()?;
index.load_searchers()?;
let searcher = index.searcher();
let searcher = index.searcher();
let query_parser = QueryParser::for_index(&index, vec![title, body]);
let query_parser = QueryParser::for_index(&index, vec![title, body]);
// stop words are applied on the query as well.
// The following will be equivalent to `title:frankenstein`
let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
// stop words are applied on the query as well.
// The following will be equivalent to `title:frankenstein`
let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
let mut top_collector = TopCollector::with_limit(10);
let mut top_collector = TopCollector::with_limit(10);
searcher.search(&*query, &mut top_collector)?;
searcher.search(&*query, &mut top_collector)?;
let doc_addresses = top_collector.docs();
let doc_addresses = top_collector.docs();
for doc_address in doc_addresses {
let retrieved_doc = searcher.doc(&doc_address)?;
println!("{}", schema.to_json(&retrieved_doc));
}
for doc_address in doc_addresses {
let retrieved_doc = searcher.doc(&doc_address)?;
println!("{}", schema.to_json(&retrieved_doc));
}
Ok(())
}
Ok(())
}