mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 17:22:54 +00:00
Cargo Format (#420)
This commit is contained in:
@@ -10,7 +10,6 @@
|
||||
// - search for the best document matchings "sea whale"
|
||||
// - retrieve the best document original content.
|
||||
|
||||
|
||||
extern crate tempdir;
|
||||
|
||||
// ---
|
||||
@@ -235,9 +234,7 @@ fn main() -> tantivy::Result<()> {
|
||||
println!("{}", schema.to_json(&retrieved_doc));
|
||||
}
|
||||
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
// In this example, we'll see how to define a tokenizer pipeline
|
||||
// by aligning a bunch of `TokenFilter`.
|
||||
|
||||
|
||||
#[macro_use]
|
||||
extern crate tantivy;
|
||||
use tantivy::collector::TopCollector;
|
||||
@@ -12,7 +11,6 @@ use tantivy::schema::*;
|
||||
use tantivy::tokenizer::NgramTokenizer;
|
||||
use tantivy::Index;
|
||||
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
// # Defining the schema
|
||||
//
|
||||
|
||||
@@ -11,10 +11,9 @@
|
||||
#[macro_use]
|
||||
extern crate tantivy;
|
||||
use tantivy::collector::TopCollector;
|
||||
use tantivy::query::TermQuery;
|
||||
use tantivy::schema::*;
|
||||
use tantivy::Index;
|
||||
use tantivy::query::TermQuery;
|
||||
|
||||
|
||||
// A simple helper function to fetch a single document
|
||||
// given its id from our index.
|
||||
@@ -31,7 +30,7 @@ fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result<Op
|
||||
let mut top_collector = TopCollector::with_limit(1);
|
||||
searcher.search(&term_query, &mut top_collector)?;
|
||||
|
||||
if let Some(doc_address) = top_collector.docs().first() {
|
||||
if let Some(doc_address) = top_collector.docs().first() {
|
||||
let doc = searcher.doc(doc_address)?;
|
||||
Ok(Some(doc))
|
||||
} else {
|
||||
@@ -41,7 +40,6 @@ fn extract_doc_given_isbn(index: &Index, isbn_term: &Term) -> tantivy::Result<Op
|
||||
}
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
|
||||
// # Defining the schema
|
||||
//
|
||||
// Check out the *basic_search* example if this makes
|
||||
@@ -126,7 +124,6 @@ fn main() -> tantivy::Result<()> {
|
||||
isbn => "978-9176370711",
|
||||
));
|
||||
|
||||
|
||||
// You are guaranteed that your clients will only observe your index in
|
||||
// the state it was in after a commit.
|
||||
// In this example, your search engine will at no point be missing the *Frankenstein* document.
|
||||
@@ -143,4 +140,4 @@ fn main() -> tantivy::Result<()> {
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,60 +22,60 @@ use tantivy::schema::*;
|
||||
use tantivy::Index;
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
// Let's create a temporary directory for the
|
||||
// sake of this example
|
||||
let index_path = TempDir::new("tantivy_facet_example_dir")?;
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
// Let's create a temporary directory for the
|
||||
// sake of this example
|
||||
let index_path = TempDir::new("tantivy_facet_example_dir")?;
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
|
||||
schema_builder.add_text_field("name", TEXT | STORED);
|
||||
schema_builder.add_text_field("name", TEXT | STORED);
|
||||
|
||||
// this is our faceted field
|
||||
schema_builder.add_facet_field("tags");
|
||||
// this is our faceted field
|
||||
schema_builder.add_facet_field("tags");
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_dir(&index_path, schema.clone())?;
|
||||
let index = Index::create_in_dir(&index_path, schema.clone())?;
|
||||
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
|
||||
let name = schema.get_field("name").unwrap();
|
||||
let tags = schema.get_field("tags").unwrap();
|
||||
let name = schema.get_field("name").unwrap();
|
||||
let tags = schema.get_field("tags").unwrap();
|
||||
|
||||
// For convenience, tantivy also comes with a macro to
|
||||
// reduce the boilerplate above.
|
||||
index_writer.add_document(doc!(
|
||||
// For convenience, tantivy also comes with a macro to
|
||||
// reduce the boilerplate above.
|
||||
index_writer.add_document(doc!(
|
||||
name => "the ditch",
|
||||
tags => Facet::from("/pools/north")
|
||||
));
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
index_writer.add_document(doc!(
|
||||
name => "little stacey",
|
||||
tags => Facet::from("/pools/south")
|
||||
));
|
||||
|
||||
index_writer.commit()?;
|
||||
index_writer.commit()?;
|
||||
|
||||
index.load_searchers()?;
|
||||
index.load_searchers()?;
|
||||
|
||||
let searcher = index.searcher();
|
||||
let searcher = index.searcher();
|
||||
|
||||
let mut facet_collector = FacetCollector::for_field(tags);
|
||||
facet_collector.add_facet("/pools");
|
||||
let mut facet_collector = FacetCollector::for_field(tags);
|
||||
facet_collector.add_facet("/pools");
|
||||
|
||||
searcher.search(&AllQuery, &mut facet_collector).unwrap();
|
||||
searcher.search(&AllQuery, &mut facet_collector).unwrap();
|
||||
|
||||
let counts = facet_collector.harvest();
|
||||
// This lists all of the facet counts
|
||||
let facets: Vec<(&Facet, u64)> = counts.get("/pools").collect();
|
||||
assert_eq!(
|
||||
facets,
|
||||
vec![
|
||||
(&Facet::from("/pools/north"), 1),
|
||||
(&Facet::from("/pools/south"), 1)
|
||||
]
|
||||
);
|
||||
let counts = facet_collector.harvest();
|
||||
// This lists all of the facet counts
|
||||
let facets: Vec<(&Facet, u64)> = counts.get("/pools").collect();
|
||||
assert_eq!(
|
||||
facets,
|
||||
vec![
|
||||
(&Facet::from("/pools/north"), 1),
|
||||
(&Facet::from("/pools/south"), 1),
|
||||
]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
Ok(())
|
||||
}
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
@@ -7,18 +7,15 @@
|
||||
// the list of documents containing a term, getting
|
||||
// its term frequency, and accessing its positions.
|
||||
|
||||
|
||||
// ---
|
||||
// Importing tantivy...
|
||||
#[macro_use]
|
||||
extern crate tantivy;
|
||||
use tantivy::schema::*;
|
||||
use tantivy::Index;
|
||||
use tantivy::{DocSet, DocId, Postings};
|
||||
use tantivy::{DocId, DocSet, Postings};
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
|
||||
|
||||
// We first create a schema for the sake of the
|
||||
// example. Check the `basic_search` example for more information.
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
@@ -47,7 +44,6 @@ fn main() -> tantivy::Result<()> {
|
||||
// there is actually only one segment here, but let's iterate through the list
|
||||
// anyway)
|
||||
for segment_reader in searcher.segment_readers() {
|
||||
|
||||
// A segment contains different data structure.
|
||||
// Inverted index stands for the combination of
|
||||
// - the term dictionary
|
||||
@@ -58,19 +54,18 @@ fn main() -> tantivy::Result<()> {
|
||||
// Let's go through all docs containing the term `title:the` and access their position
|
||||
let term_the = Term::from_field_text(title, "the");
|
||||
|
||||
|
||||
// This segment posting object is like a cursor over the documents matching the term.
|
||||
// The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
|
||||
// and positions.
|
||||
//
|
||||
// If you don't need all this information, you may get better performance by decompressing less
|
||||
// information.
|
||||
if let Some(mut segment_postings) = inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions) {
|
||||
|
||||
if let Some(mut segment_postings) =
|
||||
inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)
|
||||
{
|
||||
// this buffer will be used to request for positions
|
||||
let mut positions: Vec<u32> = Vec::with_capacity(100);
|
||||
while segment_postings.advance() {
|
||||
|
||||
// the number of time the term appears in the document.
|
||||
let doc_id: DocId = segment_postings.doc(); //< do not try to access this before calling advance once.
|
||||
|
||||
@@ -98,7 +93,6 @@ fn main() -> tantivy::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// A `Term` is a text token associated with a field.
|
||||
// Let's go through all docs containing the term `title:the` and access their position
|
||||
let term_the = Term::from_field_text(title, "the");
|
||||
@@ -111,7 +105,6 @@ fn main() -> tantivy::Result<()> {
|
||||
// Also, for some VERY specific high performance use case like an OLAP analysis of logs,
|
||||
// you can get better performance by accessing directly the blocks of doc ids.
|
||||
for segment_reader in searcher.segment_readers() {
|
||||
|
||||
// A segment contains different data structure.
|
||||
// Inverted index stands for the combination of
|
||||
// - the term dictionary
|
||||
@@ -124,7 +117,9 @@ fn main() -> tantivy::Result<()> {
|
||||
//
|
||||
// If you don't need all this information, you may get better performance by decompressing less
|
||||
// information.
|
||||
if let Some(mut block_segment_postings) = inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic) {
|
||||
if let Some(mut block_segment_postings) =
|
||||
inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)
|
||||
{
|
||||
while block_segment_postings.advance() {
|
||||
// Once again these docs MAY contains deleted documents as well.
|
||||
let docs = block_segment_postings.docs();
|
||||
@@ -136,4 +131,3 @@ fn main() -> tantivy::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +66,6 @@ fn main() -> tantivy::Result<()> {
|
||||
println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
|
||||
println!("snippet: {}", snippet.to_html());
|
||||
}
|
||||
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -22,59 +22,59 @@ use tantivy::tokenizer::*;
|
||||
use tantivy::Index;
|
||||
|
||||
fn main() -> tantivy::Result<()> {
|
||||
// this example assumes you understand the content in `basic_search`
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
// this example assumes you understand the content in `basic_search`
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
|
||||
// This configures your custom options for how tantivy will
|
||||
// store and process your content in the index; The key
|
||||
// to note is that we are setting the tokenizer to `stoppy`
|
||||
// which will be defined and registered below.
|
||||
let text_field_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("stoppy")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let text_options = TextOptions::default()
|
||||
.set_indexing_options(text_field_indexing)
|
||||
.set_stored();
|
||||
// This configures your custom options for how tantivy will
|
||||
// store and process your content in the index; The key
|
||||
// to note is that we are setting the tokenizer to `stoppy`
|
||||
// which will be defined and registered below.
|
||||
let text_field_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("stoppy")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let text_options = TextOptions::default()
|
||||
.set_indexing_options(text_field_indexing)
|
||||
.set_stored();
|
||||
|
||||
// Our first field is title.
|
||||
schema_builder.add_text_field("title", text_options);
|
||||
// Our first field is title.
|
||||
schema_builder.add_text_field("title", text_options);
|
||||
|
||||
// Our second field is body.
|
||||
let text_field_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("stoppy")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let text_options = TextOptions::default()
|
||||
.set_indexing_options(text_field_indexing)
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("body", text_options);
|
||||
// Our second field is body.
|
||||
let text_field_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer("stoppy")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let text_options = TextOptions::default()
|
||||
.set_indexing_options(text_field_indexing)
|
||||
.set_stored();
|
||||
schema_builder.add_text_field("body", text_options);
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
|
||||
// This tokenizer lowers all of the text (to help with stop word matching)
|
||||
// then removes all instances of `the` and `and` from the corpus
|
||||
let tokenizer = SimpleTokenizer
|
||||
.filter(LowerCaser)
|
||||
.filter(StopWordFilter::remove(vec![
|
||||
"the".to_string(),
|
||||
"and".to_string(),
|
||||
]));
|
||||
// This tokenizer lowers all of the text (to help with stop word matching)
|
||||
// then removes all instances of `the` and `and` from the corpus
|
||||
let tokenizer = SimpleTokenizer
|
||||
.filter(LowerCaser)
|
||||
.filter(StopWordFilter::remove(vec![
|
||||
"the".to_string(),
|
||||
"and".to_string(),
|
||||
]));
|
||||
|
||||
index.tokenizers().register("stoppy", tokenizer);
|
||||
index.tokenizers().register("stoppy", tokenizer);
|
||||
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
|
||||
let title = schema.get_field("title").unwrap();
|
||||
let body = schema.get_field("body").unwrap();
|
||||
let title = schema.get_field("title").unwrap();
|
||||
let body = schema.get_field("body").unwrap();
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
index_writer.add_document(doc!(
|
||||
title => "The Old Man and the Sea",
|
||||
body => "He was an old man who fished alone in a skiff in the Gulf Stream and \
|
||||
he had gone eighty-four days now without taking a fish."
|
||||
));
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
index_writer.add_document(doc!(
|
||||
title => "Of Mice and Men",
|
||||
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
|
||||
bank and runs deep and green. The water is warm too, for it has slipped twinkling \
|
||||
@@ -86,7 +86,7 @@ fn main() -> tantivy::Result<()> {
|
||||
limbs and branches that arch over the pool"
|
||||
));
|
||||
|
||||
index_writer.add_document(doc!(
|
||||
index_writer.add_document(doc!(
|
||||
title => "Frankenstein",
|
||||
body => "You will rejoice to hear that no disaster has accompanied the commencement of an \
|
||||
enterprise which you have regarded with such evil forebodings. I arrived here \
|
||||
@@ -94,28 +94,28 @@ fn main() -> tantivy::Result<()> {
|
||||
increasing confidence in the success of my undertaking."
|
||||
));
|
||||
|
||||
index_writer.commit()?;
|
||||
index_writer.commit()?;
|
||||
|
||||
index.load_searchers()?;
|
||||
index.load_searchers()?;
|
||||
|
||||
let searcher = index.searcher();
|
||||
let searcher = index.searcher();
|
||||
|
||||
let query_parser = QueryParser::for_index(&index, vec![title, body]);
|
||||
let query_parser = QueryParser::for_index(&index, vec![title, body]);
|
||||
|
||||
// stop words are applied on the query as well.
|
||||
// The following will be equivalent to `title:frankenstein`
|
||||
let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
|
||||
// stop words are applied on the query as well.
|
||||
// The following will be equivalent to `title:frankenstein`
|
||||
let query = query_parser.parse_query("title:\"the Frankenstein\"")?;
|
||||
|
||||
let mut top_collector = TopCollector::with_limit(10);
|
||||
let mut top_collector = TopCollector::with_limit(10);
|
||||
|
||||
searcher.search(&*query, &mut top_collector)?;
|
||||
searcher.search(&*query, &mut top_collector)?;
|
||||
|
||||
let doc_addresses = top_collector.docs();
|
||||
let doc_addresses = top_collector.docs();
|
||||
|
||||
for doc_address in doc_addresses {
|
||||
let retrieved_doc = searcher.doc(&doc_address)?;
|
||||
println!("{}", schema.to_json(&retrieved_doc));
|
||||
}
|
||||
for doc_address in doc_addresses {
|
||||
let retrieved_doc = searcher.doc(&doc_address)?;
|
||||
println!("{}", schema.to_json(&retrieved_doc));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user