Compare commits

...

17 Commits

Author SHA1 Message Date
Paul Masurel
f2b8c030d5 macos ci 2019-09-13 10:16:42 +09:00
fdb-hiroshima
7e08e0047b fix Term documentation (#655)
u64-based fields are actually 4+8=12 bytes long
2019-09-11 18:49:35 +09:00
fdb-hiroshima
1a817f117f fix documentation error (#654)
Union missdocumented as doing an intersection
Union and Intersection can hold more than 2 DocSets
2019-09-11 17:12:08 +09:00
petr-tik
2ec19b21ae Remove unnecessary duplicate methods (#650)
Closes #649

Spotted by @imor
2019-09-09 06:36:04 +09:00
Raminder Singh
141f5a93f7 Using FnvHashMap for mapping UnorderedTermId to TermOrdinal. Fixes #507 (#647)
* Using FnvHashMap for mapping UnorderedTermId to TermOrdinal. Fixes #507

* Fixed cargo fmt errors
2019-09-07 19:40:21 +09:00
Paul Masurel
df47d55cd2 Occur debug interface (#648) 2019-09-07 15:08:45 +09:00
Raminder Singh
5e579fd6b7 Fixed clippy warning: unneeded return statement (#646) 2019-09-07 10:14:37 +09:00
Paul Masurel
4b9c1dce69 Moving queyr grammar to a different crate. (#645) 2019-09-05 09:37:28 +09:00
Paul Masurel
d74f71bbef Lighter regex dependency. (#644)
Detail on https://github.com/rust-lang/regex/pull/613
2019-09-04 13:10:12 +09:00
Paul Masurel
5196ca41d8 Small code clean up 2019-09-03 09:22:32 +09:00
dependabot-preview[bot]
4959e06151 Update once_cell requirement from 0.2 to 1.0 (#643)
Updates the requirements on [once_cell](https://github.com/matklad/once_cell) to permit the latest version.
- [Release notes](https://github.com/matklad/once_cell/releases)
- [Changelog](https://github.com/matklad/once_cell/blob/master/CHANGELOG.md)
- [Commits](https://github.com/matklad/once_cell/compare/v0.2.0...v1.0.2)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2019-09-03 07:00:45 +09:00
Paul Masurel
c1635c13f6 RegexQuery performance: make it possible to cache Regexes - remastered by fulmicoton (Closes #639) (#641)
* small docs cleanup

* only compile a regex once per RegexQuery

Building a `Regex` is an expensive operation. Users of `RegexQuery`
need to cache and reuse regexes when searching across multiple fields.

This is the first step towards allowing that: we can store the `Regex`
directly in the `RegexQuery`, instead of the string pattern.

* RegexQuery: account for possible failure in the constructor

When building a regex from a str pattern, we have to account for the
possibility that the pattern is invalid. Before the previous commit, the
failure would happen in the `specialized_weight` method. Now that we
store a compiled `Regex` in `RegexQuery`, `specialized_weight` doesn't
fail anymore, and we can fail early while constructing `RegexQuery` if
the pattern is invalid.

This is a breaking change for users of `RegexQuery::new`.

* add RegexQuery::from_regex method

This builds a `RegexQuery` from an already compiled `Regex`. The use of
`Into<Arc<Regex>>` is to allow the caller to either simply pass a
`Regex`, or an `Arc<Regex>`, in case it needs to be cached and shared on
the caller's side.

* Using an Arc in AutomatonWeight

Closes #639
2019-08-22 16:14:01 +09:00
Paul Masurel
135e0ea2e9 Expose new segment meta from Index (#637) 2019-08-19 10:39:15 +09:00
Paul Masurel
f283bfd7ab Added segmentid_from_string (#636) 2019-08-19 10:37:30 +09:00
Joshua Dutton
9f74786db2 Update import statements in examples, doctests (#633)
Update import statements to edition 2018, including removing
`extern crate` and  `#[macro_use]`. Alphabetize the statements.
2019-08-19 07:26:35 +09:00
Joshua Dutton
32e5d7a0c7 Fix trait object in doctest (#635) 2019-08-19 07:25:00 +09:00
Joshua Dutton
84c615cff1 Fixing typos (#634) 2019-08-19 07:24:05 +09:00
57 changed files with 348 additions and 295 deletions

View File

@@ -41,8 +41,8 @@ matrix:
- env: TARGET=x86_64-unknown-linux-gnu CODECOV=1 #UPLOAD_DOCS=1 - env: TARGET=x86_64-unknown-linux-gnu CODECOV=1 #UPLOAD_DOCS=1
# - env: TARGET=x86_64-unknown-linux-musl CODECOV=1 # - env: TARGET=x86_64-unknown-linux-musl CODECOV=1
# OSX # OSX
#- env: TARGET=x86_64-apple-darwin - env: TARGET=x86_64-apple-darwin
# os: osx os: osx
before_install: before_install:
- set -e - set -e

View File

@@ -7,10 +7,13 @@ Tantivy 0.11.0
- Better handling of whitespaces. - Better handling of whitespaces.
- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik) - Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
- API change around `Box<BoxableTokenizer>`. See detail in #629 - API change around `Box<BoxableTokenizer>`. See detail in #629
- Avoid rebuilding Regex automaton whenever a regex query is reused. #630 (@brainlock)
## How to update? ## How to update?
`Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct. - `Box<dyn BoxableTokenizer>` has been replaced by a `BoxedTokenizer` struct.
- Regex are now compiled when the `RegexQuery` instance is built. As a result, it can now return
an error and handling the `Result` is required.
Tantivy 0.10.1 Tantivy 0.10.1
===================== =====================

View File

@@ -15,8 +15,8 @@ edition = "2018"
[dependencies] [dependencies]
base64 = "0.10.0" base64 = "0.10.0"
byteorder = "1.0" byteorder = "1.0"
once_cell = "0.2" once_cell = "1.0"
regex = "1.0" regex ={version = "1.3.0", default-features = false, features = ["std"]}
tantivy-fst = "0.1" tantivy-fst = "0.1"
memmap = {version = "0.7", optional=true} memmap = {version = "0.7", optional=true}
lz4 = {version="1.20", optional=true} lz4 = {version="1.20", optional=true}
@@ -24,7 +24,6 @@ snap = {version="0.2"}
atomicwrites = {version="0.2.2", optional=true} atomicwrites = {version="0.2.2", optional=true}
tempfile = "3.0" tempfile = "3.0"
log = "0.4" log = "0.4"
combine = ">=3.6.0,<4.0.0"
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"
serde_json = "1.0" serde_json = "1.0"
@@ -42,6 +41,7 @@ owning_ref = "0.4"
stable_deref_trait = "1.0.0" stable_deref_trait = "1.0.0"
rust-stemmers = "1.1" rust-stemmers = "1.1"
downcast-rs = { version="1.0" } downcast-rs = { version="1.0" }
tantivy-query-grammar = { path="./query-grammar" }
bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]} bitpacking = {version="0.8", default-features = false, features=["bitpacker4x"]}
census = "0.2" census = "0.2"
fnv = "1.0.6" fnv = "1.0.6"
@@ -80,10 +80,14 @@ failpoints = ["fail/failpoints"]
unstable = [] # useful for benches. unstable = [] # useful for benches.
wasm-bindgen = ["uuid/wasm-bindgen"] wasm-bindgen = ["uuid/wasm-bindgen"]
[workspace]
members = ["query-grammar"]
[badges] [badges]
travis-ci = { repository = "tantivy-search/tantivy" } travis-ci = { repository = "tantivy-search/tantivy" }
[dev-dependencies.fail] [dev-dependencies.fail]
version = "0.3"
features = ["failpoints"] features = ["failpoints"]
# Following the "fail" crate best practises, we isolate # Following the "fail" crate best practises, we isolate

3
Makefile Normal file
View File

@@ -0,0 +1,3 @@
test:
echo "Run test only... No examples."
cargo test --tests --lib

View File

@@ -7,7 +7,7 @@ set -ex
main() { main() {
if [ ! -z $CODECOV ]; then if [ ! -z $CODECOV ]; then
echo "Codecov" echo "Codecov"
cargo build --verbose && cargo coverage --verbose && bash <(curl -s https://codecov.io/bash) -s target/kcov cargo build --verbose && cargo coverage --verbose --all && bash <(curl -s https://codecov.io/bash) -s target/kcov
else else
echo "Build" echo "Build"
cross build --target $TARGET cross build --target $TARGET
@@ -15,7 +15,8 @@ main() {
return return
fi fi
echo "Test" echo "Test"
cross test --target $TARGET --no-default-features --features mmap -- --test-threads 1 cross test --target $TARGET --no-default-features --features mmap
cross test --target $TARGET --no-default-features --features mmap query-grammar
fi fi
for example in $(ls examples/*.rs) for example in $(ls examples/*.rs)
do do

View File

@@ -5,20 +5,17 @@
// //
// We will : // We will :
// - define our schema // - define our schema
// = create an index in a directory // - create an index in a directory
// - index few documents in our index // - index a few documents into our index
// - search for the best document matchings "sea whale" // - search for the best document matching a basic query
// - retrieve the best document original content. // - retrieve the best document's original content.
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::{doc, Index, ReloadPolicy};
use tantivy::ReloadPolicy;
use tempfile::TempDir; use tempfile::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
@@ -33,7 +30,7 @@ fn main() -> tantivy::Result<()> {
// and for each field, its type and "the way it should // and for each field, its type and "the way it should
// be indexed". // be indexed".
// first we need to define a schema ... // First we need to define a schema ...
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
// Our first field is title. // Our first field is title.
@@ -48,7 +45,7 @@ fn main() -> tantivy::Result<()> {
// //
// `STORED` means that the field will also be saved // `STORED` means that the field will also be saved
// in a compressed, row-oriented key-value store. // in a compressed, row-oriented key-value store.
// This store is useful to reconstruct the // This store is useful for reconstructing the
// documents that were selected during the search phase. // documents that were selected during the search phase.
schema_builder.add_text_field("title", TEXT | STORED); schema_builder.add_text_field("title", TEXT | STORED);
@@ -57,8 +54,7 @@ fn main() -> tantivy::Result<()> {
// need to be able to be able to retrieve it // need to be able to be able to retrieve it
// for our application. // for our application.
// //
// We can make our index lighter and // We can make our index lighter by omitting the `STORED` flag.
// by omitting `STORED` flag.
schema_builder.add_text_field("body", TEXT); schema_builder.add_text_field("body", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();
@@ -71,7 +67,7 @@ fn main() -> tantivy::Result<()> {
// with our schema in the directory. // with our schema in the directory.
let index = Index::create_in_dir(&index_path, schema.clone())?; let index = Index::create_in_dir(&index_path, schema.clone())?;
// To insert document we need an index writer. // To insert a document we will need an index writer.
// There must be only one writer at a time. // There must be only one writer at a time.
// This single `IndexWriter` is already // This single `IndexWriter` is already
// multithreaded. // multithreaded.
@@ -149,8 +145,8 @@ fn main() -> tantivy::Result<()> {
// At this point our documents are not searchable. // At this point our documents are not searchable.
// //
// //
// We need to call .commit() explicitly to force the // We need to call `.commit()` explicitly to force the
// index_writer to finish processing the documents in the queue, // `index_writer` to finish processing the documents in the queue,
// flush the current index to the disk, and advertise // flush the current index to the disk, and advertise
// the existence of new documents. // the existence of new documents.
// //
@@ -162,14 +158,14 @@ fn main() -> tantivy::Result<()> {
// persistently indexed. // persistently indexed.
// //
// In the scenario of a crash or a power failure, // In the scenario of a crash or a power failure,
// tantivy behaves as if has rolled back to its last // tantivy behaves as if it has rolled back to its last
// commit. // commit.
// # Searching // # Searching
// //
// ### Searcher // ### Searcher
// //
// A reader is required to get search the index. // A reader is required first in order to search an index.
// It acts as a `Searcher` pool that reloads itself, // It acts as a `Searcher` pool that reloads itself,
// depending on a `ReloadPolicy`. // depending on a `ReloadPolicy`.
// //
@@ -185,7 +181,7 @@ fn main() -> tantivy::Result<()> {
// We now need to acquire a searcher. // We now need to acquire a searcher.
// //
// A searcher points to snapshotted, immutable version of the index. // A searcher points to a snapshotted, immutable version of the index.
// //
// Some search experience might require more than // Some search experience might require more than
// one query. Using the same searcher ensures that all of these queries will run on the // one query. Using the same searcher ensures that all of these queries will run on the
@@ -205,7 +201,7 @@ fn main() -> tantivy::Result<()> {
// in both title and body. // in both title and body.
let query_parser = QueryParser::for_index(&index, vec![title, body]); let query_parser = QueryParser::for_index(&index, vec![title, body]);
// QueryParser may fail if the query is not in the right // `QueryParser` may fail if the query is not in the right
// format. For user facing applications, this can be a problem. // format. For user facing applications, this can be a problem.
// A ticket has been opened regarding this problem. // A ticket has been opened regarding this problem.
let query = query_parser.parse_query("sea whale")?; let query = query_parser.parse_query("sea whale")?;
@@ -221,7 +217,7 @@ fn main() -> tantivy::Result<()> {
// //
// We are not interested in all of the documents but // We are not interested in all of the documents but
// only in the top 10. Keeping track of our top 10 best documents // only in the top 10. Keeping track of our top 10 best documents
// is the role of the TopDocs. // is the role of the `TopDocs` collector.
// We can now perform our query. // We can now perform our query.
let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?;

View File

@@ -9,15 +9,12 @@
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::collector::{Collector, SegmentCollector}; use tantivy::collector::{Collector, SegmentCollector};
use tantivy::fastfield::FastFieldReader; use tantivy::fastfield::FastFieldReader;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::Field; use tantivy::schema::Field;
use tantivy::schema::{Schema, FAST, INDEXED, TEXT}; use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
use tantivy::SegmentReader; use tantivy::{doc, Index, SegmentReader, TantivyError};
use tantivy::{Index, TantivyError};
#[derive(Default)] #[derive(Default)]
struct Stats { struct Stats {

View File

@@ -2,14 +2,11 @@
// //
// In this example, we'll see how to define a tokenizer pipeline // In this example, we'll see how to define a tokenizer pipeline
// by aligning a bunch of `TokenFilter`. // by aligning a bunch of `TokenFilter`.
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::tokenizer::NgramTokenizer; use tantivy::tokenizer::NgramTokenizer;
use tantivy::Index; use tantivy::{doc, Index};
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// # Defining the schema // # Defining the schema

View File

@@ -8,13 +8,10 @@
// //
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::TermQuery; use tantivy::query::TermQuery;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::{doc, Index, IndexReader};
use tantivy::IndexReader;
// A simple helper function to fetch a single document // A simple helper function to fetch a single document
// given its id from our index. // given its id from our index.

View File

@@ -12,12 +12,10 @@
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::collector::FacetCollector; use tantivy::collector::FacetCollector;
use tantivy::query::AllQuery; use tantivy::query::AllQuery;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::{doc, Index};
use tempfile::TempDir; use tempfile::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {

View File

@@ -2,14 +2,10 @@
// //
// Below is an example of creating an indexed integer field in your schema // Below is an example of creating an indexed integer field in your schema
// You can use RangeQuery to get a Count of all occurrences in a given range. // You can use RangeQuery to get a Count of all occurrences in a given range.
#[macro_use]
extern crate tantivy;
use tantivy::collector::Count; use tantivy::collector::Count;
use tantivy::query::RangeQuery; use tantivy::query::RangeQuery;
use tantivy::schema::{Schema, INDEXED}; use tantivy::schema::{Schema, INDEXED};
use tantivy::Index; use tantivy::{doc, Index, Result};
use tantivy::Result;
fn run() -> Result<()> { fn run() -> Result<()> {
// For the sake of simplicity, this schema will only have 1 field // For the sake of simplicity, this schema will only have 1 field

View File

@@ -9,11 +9,8 @@
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::{doc, DocId, DocSet, Index, Postings};
use tantivy::{DocId, DocSet, Postings};
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// We first create a schema for the sake of the // We first create a schema for the sake of the

View File

@@ -25,14 +25,11 @@
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use std::thread; use std::thread;
use std::time::Duration; use std::time::Duration;
use tantivy::schema::{Schema, STORED, TEXT}; use tantivy::schema::{Schema, STORED, TEXT};
use tantivy::Opstamp; use tantivy::{doc, Index, IndexWriter, Opstamp};
use tantivy::{Index, IndexWriter};
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// # Defining the schema // # Defining the schema
@@ -49,10 +46,9 @@ fn main() -> tantivy::Result<()> {
thread::spawn(move || { thread::spawn(move || {
// we index 100 times the document... for the sake of the example. // we index 100 times the document... for the sake of the example.
for i in 0..100 { for i in 0..100 {
let opstamp = { let opstamp = index_writer_clone_1
// A read lock is sufficient here. .read().unwrap() //< A read lock is sufficient here.
let index_writer_rlock = index_writer_clone_1.read().unwrap(); .add_document(
index_writer_rlock.add_document(
doc!( doc!(
title => "Of Mice and Men", title => "Of Mice and Men",
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \ body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
@@ -63,8 +59,7 @@ fn main() -> tantivy::Result<()> {
fresh and green with every spring, carrying in their lower leaf junctures the \ fresh and green with every spring, carrying in their lower leaf junctures the \
debris of the winters flooding; and sycamores with mottled, white, recumbent \ debris of the winters flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool" limbs and branches that arch over the pool"
)) ));
};
println!("add doc {} from thread 1 - opstamp {}", i, opstamp); println!("add doc {} from thread 1 - opstamp {}", i, opstamp);
thread::sleep(Duration::from_millis(20)); thread::sleep(Duration::from_millis(20));
} }

View File

@@ -7,13 +7,10 @@
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::Index; use tantivy::{doc, Index, Snippet, SnippetGenerator};
use tantivy::{Snippet, SnippetGenerator};
use tempfile::TempDir; use tempfile::TempDir;
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {

View File

@@ -11,13 +11,11 @@
// --- // ---
// Importing tantivy... // Importing tantivy...
#[macro_use]
extern crate tantivy;
use tantivy::collector::TopDocs; use tantivy::collector::TopDocs;
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use tantivy::schema::*; use tantivy::schema::*;
use tantivy::tokenizer::*; use tantivy::tokenizer::*;
use tantivy::Index; use tantivy::{doc, Index};
fn main() -> tantivy::Result<()> { fn main() -> tantivy::Result<()> {
// this example assumes you understand the content in `basic_search` // this example assumes you understand the content in `basic_search`

16
query-grammar/Cargo.toml Normal file
View File

@@ -0,0 +1,16 @@
[package]
name = "tantivy-query-grammar"
version = "0.11.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]
description = """Search engine library"""
documentation = "https://tantivy-search.github.io/tantivy/tantivy/index.html"
homepage = "https://github.com/tantivy-search/tantivy"
repository = "https://github.com/tantivy-search/tantivy"
readme = "README.md"
keywords = ["search", "information", "retrieval"]
edition = "2018"
[dependencies]
combine = ">=3.6.0,<4.0.0"

17
query-grammar/src/lib.rs Normal file
View File

@@ -0,0 +1,17 @@
#![recursion_limit = "100"]
mod occur;
mod query_grammar;
mod user_input_ast;
use combine::parser::Parser;
pub use crate::occur::Occur;
use crate::query_grammar::parse_to_ast;
pub use crate::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
pub struct Error;
pub fn parse_query(query: &str) -> Result<UserInputAST, Error> {
let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?;
Ok(user_input_ast)
}

View File

@@ -1,3 +1,6 @@
use std::fmt;
use std::fmt::Write;
/// Defines whether a term in a query must be present, /// Defines whether a term in a query must be present,
/// should be present or must not be present. /// should be present or must not be present.
#[derive(Debug, Clone, Hash, Copy, Eq, PartialEq)] #[derive(Debug, Clone, Hash, Copy, Eq, PartialEq)]
@@ -18,32 +21,38 @@ impl Occur {
/// - `Should` => '?', /// - `Should` => '?',
/// - `Must` => '+' /// - `Must` => '+'
/// - `Not` => '-' /// - `Not` => '-'
pub fn to_char(self) -> char { fn to_char(self) -> char {
match self { match self {
Occur::Should => '?', Occur::Should => '?',
Occur::Must => '+', Occur::Must => '+',
Occur::MustNot => '-', Occur::MustNot => '-',
} }
} }
}
/// Compose two occur values. /// Compose two occur values.
pub fn compose_occur(left: Occur, right: Occur) -> Occur { pub fn compose(left: Occur, right: Occur) -> Occur {
match left { match left {
Occur::Should => right, Occur::Should => right,
Occur::Must => { Occur::Must => {
if right == Occur::MustNot { if right == Occur::MustNot {
Occur::MustNot Occur::MustNot
} else { } else {
Occur::Must Occur::Must
}
} }
} Occur::MustNot => {
Occur::MustNot => { if right == Occur::MustNot {
if right == Occur::MustNot { Occur::Must
Occur::Must } else {
} else { Occur::MustNot
Occur::MustNot }
} }
} }
} }
} }
impl fmt::Display for Occur {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_char(self.to_char())
}
}

View File

@@ -1,6 +1,5 @@
use super::user_input_ast::*; use super::user_input_ast::*;
use crate::query::occur::Occur; use crate::Occur;
use crate::query::query_parser::user_input_ast::UserInputBound;
use combine::char::*; use combine::char::*;
use combine::error::StreamError; use combine::error::StreamError;
use combine::stream::StreamErrorFor; use combine::stream::StreamErrorFor;

View File

@@ -1,7 +1,7 @@
use std::fmt; use std::fmt;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use crate::query::Occur; use crate::Occur;
#[derive(PartialEq)] #[derive(PartialEq)]
pub enum UserInputLeaf { pub enum UserInputLeaf {
@@ -151,7 +151,7 @@ impl fmt::Debug for UserInputAST {
Ok(()) Ok(())
} }
UserInputAST::Unary(ref occur, ref subquery) => { UserInputAST::Unary(ref occur, ref subquery) => {
write!(formatter, "{}({:?})", occur.to_char(), subquery) write!(formatter, "{}({:?})", occur, subquery)
} }
UserInputAST::Leaf(ref subquery) => write!(formatter, "{:?}", subquery), UserInputAST::Leaf(ref subquery) => write!(formatter, "{:?}", subquery),
} }

View File

@@ -10,12 +10,10 @@ use crate::SegmentReader;
/// documents match the query. /// documents match the query.
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{Index, Result};
/// use tantivy::collector::Count; /// use tantivy::collector::Count;
/// use tantivy::query::QueryParser; /// use tantivy::query::QueryParser;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, Index, Result};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {

View File

@@ -81,12 +81,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// ///
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::schema::{Facet, Schema, TEXT};
/// use tantivy::{Index, Result};
/// use tantivy::collector::FacetCollector; /// use tantivy::collector::FacetCollector;
/// use tantivy::query::AllQuery; /// use tantivy::query::AllQuery;
/// use tantivy::schema::{Facet, Schema, TEXT};
/// use tantivy::{doc, Index, Result};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {

View File

@@ -35,7 +35,6 @@ The resulting `Fruit` will then be a typed tuple with each collector's original
in their respective position. in their respective position.
```rust ```rust
# extern crate tantivy;
# use tantivy::schema::*; # use tantivy::schema::*;
# use tantivy::*; # use tantivy::*;
# use tantivy::query::*; # use tantivy::query::*;

View File

@@ -105,12 +105,10 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
/// [Combining several collectors section of the collector documentation](./index.html#combining-several-collectors). /// [Combining several collectors section of the collector documentation](./index.html#combining-several-collectors).
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{Index, Result};
/// use tantivy::collector::{Count, TopDocs, MultiCollector}; /// use tantivy::collector::{Count, TopDocs, MultiCollector};
/// use tantivy::query::QueryParser; /// use tantivy::query::QueryParser;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, Index, Result};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {

View File

@@ -23,13 +23,10 @@ use std::fmt;
/// is `O(n log K)`. /// is `O(n log K)`.
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::DocAddress;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{Index, Result};
/// use tantivy::collector::TopDocs; /// use tantivy::collector::TopDocs;
/// use tantivy::query::QueryParser; /// use tantivy::query::QueryParser;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, DocAddress, Index, Result};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {
@@ -87,10 +84,8 @@ impl TopDocs {
/// Set top-K to rank documents by a given fast field. /// Set top-K to rank documents by a given fast field.
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// # use tantivy::schema::{Schema, FAST, TEXT}; /// # use tantivy::schema::{Schema, FAST, TEXT};
/// # use tantivy::{Index, Result, DocAddress}; /// # use tantivy::{doc, Index, Result, DocAddress};
/// # use tantivy::query::{Query, QueryParser}; /// # use tantivy::query::{Query, QueryParser};
/// use tantivy::Searcher; /// use tantivy::Searcher;
/// use tantivy::collector::TopDocs; /// use tantivy::collector::TopDocs;
@@ -128,7 +123,7 @@ impl TopDocs {
/// /// /// ///
/// /// `field` is required to be a FAST field. /// /// `field` is required to be a FAST field.
/// fn docs_sorted_by_rating(searcher: &Searcher, /// fn docs_sorted_by_rating(searcher: &Searcher,
/// query: &Query, /// query: &dyn Query,
/// sort_by_field: Field) /// sort_by_field: Field)
/// -> Result<Vec<(u64, DocAddress)>> { /// -> Result<Vec<(u64, DocAddress)>> {
/// ///
@@ -197,10 +192,8 @@ impl TopDocs {
/// learning-to-rank model over various features /// learning-to-rank model over various features
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// # use tantivy::schema::{Schema, FAST, TEXT}; /// # use tantivy::schema::{Schema, FAST, TEXT};
/// # use tantivy::{Index, DocAddress, DocId, Score}; /// # use tantivy::{doc, Index, DocAddress, DocId, Score};
/// # use tantivy::query::QueryParser; /// # use tantivy::query::QueryParser;
/// use tantivy::SegmentReader; /// use tantivy::SegmentReader;
/// use tantivy::collector::TopDocs; /// use tantivy::collector::TopDocs;
@@ -302,10 +295,8 @@ impl TopDocs {
/// # Example /// # Example
/// ///
/// ```rust /// ```rust
/// # #[macro_use]
/// # extern crate tantivy;
/// # use tantivy::schema::{Schema, FAST, TEXT}; /// # use tantivy::schema::{Schema, FAST, TEXT};
/// # use tantivy::{Index, DocAddress, DocId}; /// # use tantivy::{doc, Index, DocAddress, DocId};
/// # use tantivy::query::QueryParser; /// # use tantivy::query::QueryParser;
/// use tantivy::SegmentReader; /// use tantivy::SegmentReader;
/// use tantivy::collector::TopDocs; /// use tantivy::collector::TopDocs;

View File

@@ -216,8 +216,22 @@ impl Index {
Index::open(mmap_directory) Index::open(mmap_directory)
} }
pub(crate) fn inventory(&self) -> &SegmentMetaInventory { /// Returns the list of the segment metas tracked by the index.
&self.inventory ///
/// Such segments can of course be part of the index,
/// but also they could be segments being currently built or in the middle of a merge
/// operation.
pub fn list_all_segment_metas(&self) -> Vec<SegmentMeta> {
self.inventory.all()
}
/// Creates a new segment_meta (Advanced user only).
///
/// As long as the `SegmentMeta` lives, the files associated with the
/// `SegmentMeta` are guaranteed to not be garbage collected, regardless of
/// whether the segment is recorded as part of the index or not.
pub fn new_segment_meta(&self, segment_id: SegmentId, max_doc: u32) -> SegmentMeta {
self.inventory.new_segment_meta(segment_id, max_doc)
} }
/// Open the index using the provided directory /// Open the index using the provided directory

View File

@@ -30,7 +30,6 @@ impl SegmentMetaInventory {
.collect::<Vec<_>>() .collect::<Vec<_>>()
} }
#[doc(hidden)]
pub fn new_segment_meta(&self, segment_id: SegmentId, max_doc: u32) -> SegmentMeta { pub fn new_segment_meta(&self, segment_id: SegmentId, max_doc: u32) -> SegmentMeta {
let inner = InnerSegmentMeta { let inner = InnerSegmentMeta {
segment_id, segment_id,

View File

@@ -4,6 +4,8 @@ use uuid::Uuid;
#[cfg(test)] #[cfg(test)]
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::error::Error;
use std::str::FromStr;
#[cfg(test)] #[cfg(test)]
use std::sync::atomic; use std::sync::atomic;
@@ -52,15 +54,51 @@ impl SegmentId {
/// and the rest is random. /// and the rest is random.
/// ///
/// Picking the first 8 chars is ok to identify /// Picking the first 8 chars is ok to identify
/// segments in a display message. /// segments in a display message (e.g. a5c4dfcb).
pub fn short_uuid_string(&self) -> String { pub fn short_uuid_string(&self) -> String {
(&self.0.to_simple_ref().to_string()[..8]).to_string() (&self.0.to_simple_ref().to_string()[..8]).to_string()
} }
/// Returns a segment uuid string. /// Returns a segment uuid string.
///
/// It consists in 32 lowercase hexadecimal chars
/// (e.g. a5c4dfcbdfe645089129e308e26d5523)
pub fn uuid_string(&self) -> String { pub fn uuid_string(&self) -> String {
self.0.to_simple_ref().to_string() self.0.to_simple_ref().to_string()
} }
/// Build a `SegmentId` string from the full uuid string.
///
/// E.g. "a5c4dfcbdfe645089129e308e26d5523"
pub fn from_uuid_string(uuid_string: &str) -> Result<SegmentId, SegmentIdParseError> {
FromStr::from_str(uuid_string)
}
}
/// Error type used when parsing a `SegmentId` from a string fails.
pub struct SegmentIdParseError(uuid::parser::ParseError);
impl Error for SegmentIdParseError {}
impl fmt::Debug for SegmentIdParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
impl fmt::Display for SegmentIdParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
impl FromStr for SegmentId {
type Err = SegmentIdParseError;
fn from_str(uuid_string: &str) -> Result<Self, SegmentIdParseError> {
let uuid = Uuid::parse_str(uuid_string).map_err(SegmentIdParseError)?;
Ok(SegmentId(uuid))
}
} }
impl fmt::Debug for SegmentId { impl fmt::Debug for SegmentId {
@@ -80,3 +118,18 @@ impl Ord for SegmentId {
self.0.as_bytes().cmp(other.0.as_bytes()) self.0.as_bytes().cmp(other.0.as_bytes())
} }
} }
#[cfg(test)]
mod tests {
use super::SegmentId;
#[test]
fn test_to_uuid_string() {
let full_uuid = "a5c4dfcbdfe645089129e308e26d5523";
let segment_id = SegmentId::from_uuid_string(full_uuid).unwrap();
assert_eq!(segment_id.uuid_string(), full_uuid);
assert_eq!(segment_id.short_uuid_string(), "a5c4dfcb");
// one extra char
assert!(SegmentId::from_uuid_string("a5c4dfcbdfe645089129e308e26d5523b").is_err());
}
}

View File

@@ -265,7 +265,7 @@ impl MmapDirectoryInner {
} }
} }
if let Some(watch_wrapper) = self.watcher.write().unwrap().as_mut() { if let Some(watch_wrapper) = self.watcher.write().unwrap().as_mut() {
return Ok(watch_wrapper.watch(watch_callback)); Ok(watch_wrapper.watch(watch_callback))
} else { } else {
unreachable!("At this point, watch wrapper is supposed to be initialized"); unreachable!("At this point, watch wrapper is supposed to be initialized");
} }

View File

@@ -5,8 +5,8 @@ use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field}; use crate::schema::{Document, Field};
use crate::termdict::TermOrdinal; use crate::termdict::TermOrdinal;
use crate::DocId; use crate::DocId;
use fnv::FnvHashMap;
use itertools::Itertools; use itertools::Itertools;
use std::collections::HashMap;
use std::io; use std::io;
/// Writer for multi-valued (as in, more than one value per document) /// Writer for multi-valued (as in, more than one value per document)
@@ -102,7 +102,7 @@ impl MultiValueIntFastFieldWriter {
pub fn serialize( pub fn serialize(
&self, &self,
serializer: &mut FastFieldSerializer, serializer: &mut FastFieldSerializer,
mapping_opt: Option<&HashMap<UnorderedTermId, TermOrdinal>>, mapping_opt: Option<&FnvHashMap<UnorderedTermId, TermOrdinal>>,
) -> io::Result<()> { ) -> io::Result<()> {
{ {
// writing the offset index // writing the offset index

View File

@@ -6,6 +6,7 @@ use crate::fastfield::{BytesFastFieldWriter, FastFieldSerializer};
use crate::postings::UnorderedTermId; use crate::postings::UnorderedTermId;
use crate::schema::{Cardinality, Document, Field, FieldType, Schema}; use crate::schema::{Cardinality, Document, Field, FieldType, Schema};
use crate::termdict::TermOrdinal; use crate::termdict::TermOrdinal;
use fnv::FnvHashMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::io; use std::io;
@@ -116,7 +117,7 @@ impl FastFieldsWriter {
pub fn serialize( pub fn serialize(
&self, &self,
serializer: &mut FastFieldSerializer, serializer: &mut FastFieldSerializer,
mapping: &HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>, mapping: &HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>,
) -> io::Result<()> { ) -> io::Result<()> {
for field_writer in &self.single_value_writers { for field_writer in &self.single_value_writers {
field_writer.serialize(serializer)?; field_writer.serialize(serializer)?;

View File

@@ -209,10 +209,7 @@ fn index_documents(
assert!(num_docs > 0); assert!(num_docs > 0);
let doc_opstamps: Vec<Opstamp> = segment_writer.finalize()?; let doc_opstamps: Vec<Opstamp> = segment_writer.finalize()?;
let segment_meta = segment let segment_meta = segment.index().new_segment_meta(segment_id, num_docs);
.index()
.inventory()
.new_segment_meta(segment_id, num_docs);
let last_docstamp: Opstamp = *(doc_opstamps.last().unwrap()); let last_docstamp: Opstamp = *(doc_opstamps.last().unwrap());
@@ -450,12 +447,10 @@ impl IndexWriter {
/// by clearing and resubmitting necessary documents /// by clearing and resubmitting necessary documents
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::query::QueryParser;
/// use tantivy::collector::TopDocs; /// use tantivy::collector::TopDocs;
/// use tantivy::query::QueryParser;
/// use tantivy::schema::*; /// use tantivy::schema::*;
/// use tantivy::Index; /// use tantivy::{doc, Index};
/// ///
/// fn main() -> tantivy::Result<()> { /// fn main() -> tantivy::Result<()> {
/// let mut schema_builder = Schema::builder(); /// let mut schema_builder = Schema::builder();

View File

@@ -126,9 +126,7 @@ fn perform_merge(
let num_docs = merger.write(segment_serializer)?; let num_docs = merger.write(segment_serializer)?;
let segment_meta = index let segment_meta = index.new_segment_meta(merged_segment.id(), num_docs);
.inventory()
.new_segment_meta(merged_segment.id(), num_docs);
let after_merge_segment_entry = SegmentEntry::new(segment_meta.clone(), delete_cursor, None); let after_merge_segment_entry = SegmentEntry::new(segment_meta.clone(), delete_cursor, None);
Ok(after_merge_segment_entry) Ok(after_merge_segment_entry)
@@ -282,7 +280,7 @@ impl SegmentUpdater {
fn list_files(&self) -> HashSet<PathBuf> { fn list_files(&self) -> HashSet<PathBuf> {
let mut files = HashSet::new(); let mut files = HashSet::new();
files.insert(META_FILEPATH.to_path_buf()); files.insert(META_FILEPATH.to_path_buf());
for segment_meta in self.0.index.inventory().all() { for segment_meta in self.0.index.list_all_segment_metas() {
files.extend(segment_meta.list_files()); files.extend(segment_meta.list_files());
} }
files files

View File

@@ -1,5 +1,4 @@
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")] #![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
#![recursion_limit = "100"]
#![cfg_attr(all(feature = "unstable", test), feature(test))] #![cfg_attr(all(feature = "unstable", test), feature(test))]
#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))] #![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
#![doc(test(attr(allow(unused_variables), deny(warnings))))] #![doc(test(attr(allow(unused_variables), deny(warnings))))]
@@ -11,21 +10,12 @@
//! Think `Lucene`, but in Rust. //! Think `Lucene`, but in Rust.
//! //!
//! ```rust //! ```rust
//! # extern crate tempfile;
//! #
//! #[macro_use]
//! extern crate tantivy;
//!
//! // ...
//!
//! # use std::path::Path; //! # use std::path::Path;
//! # use tempfile::TempDir; //! # use tempfile::TempDir;
//! # use tantivy::Index;
//! # use tantivy::schema::*;
//! # use tantivy::{Score, DocAddress};
//! # use tantivy::collector::TopDocs; //! # use tantivy::collector::TopDocs;
//! # use tantivy::query::QueryParser; //! # use tantivy::query::QueryParser;
//! # use tantivy::schema::*;
//! # use tantivy::{doc, DocAddress, Index, Score};
//! # //! #
//! # fn main() { //! # fn main() {
//! # // Let's create a temporary directory for the //! # // Let's create a temporary directory for the

View File

@@ -22,11 +22,9 @@
/// ///
/// # Example /// # Example
/// ///
/// ``` /// ```rust
/// #[macro_use]
/// extern crate tantivy;
///
/// use tantivy::schema::{Schema, TEXT, FAST}; /// use tantivy::schema::{Schema, TEXT, FAST};
/// use tantivy::doc;
/// ///
/// //... /// //...
/// ///

View File

@@ -12,6 +12,7 @@ use crate::tokenizer::TokenStream;
use crate::tokenizer::{Token, MAX_TOKEN_LEN}; use crate::tokenizer::{Token, MAX_TOKEN_LEN};
use crate::DocId; use crate::DocId;
use crate::Result; use crate::Result;
use fnv::FnvHashMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::io; use std::io;
use std::marker::PhantomData; use std::marker::PhantomData;
@@ -127,12 +128,12 @@ impl MultiFieldPostingsWriter {
pub fn serialize( pub fn serialize(
&self, &self,
serializer: &mut InvertedIndexSerializer, serializer: &mut InvertedIndexSerializer,
) -> Result<HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>>> { ) -> Result<HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>> {
let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> = let mut term_offsets: Vec<(&[u8], Addr, UnorderedTermId)> =
self.term_index.iter().collect(); self.term_index.iter().collect();
term_offsets.sort_unstable_by_key(|&(k, _, _)| k); term_offsets.sort_unstable_by_key(|&(k, _, _)| k);
let mut unordered_term_mappings: HashMap<Field, HashMap<UnorderedTermId, TermOrdinal>> = let mut unordered_term_mappings: HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>> =
HashMap::new(); HashMap::new();
let field_offsets = make_field_partition(&term_offsets); let field_offsets = make_field_partition(&term_offsets);
@@ -147,7 +148,7 @@ impl MultiFieldPostingsWriter {
let unordered_term_ids = term_offsets[start..stop] let unordered_term_ids = term_offsets[start..stop]
.iter() .iter()
.map(|&(_, _, bucket)| bucket); .map(|&(_, _, bucket)| bucket);
let mapping: HashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids let mapping: FnvHashMap<UnorderedTermId, TermOrdinal> = unordered_term_ids
.enumerate() .enumerate()
.map(|(term_ord, unord_term_id)| { .map(|(term_ord, unord_term_id)| {
(unord_term_id as UnorderedTermId, term_ord as TermOrdinal) (unord_term_id as UnorderedTermId, term_ord as TermOrdinal)

View File

@@ -141,10 +141,7 @@ impl<'a> FieldSerializer<'a> {
FieldType::Str(ref text_options) => { FieldType::Str(ref text_options) => {
if let Some(text_indexing_options) = text_options.get_indexing_options() { if let Some(text_indexing_options) = text_options.get_indexing_options() {
let index_option = text_indexing_options.index_option(); let index_option = text_indexing_options.index_option();
( (index_option.has_freq(), index_option.has_positions())
index_option.is_termfreq_enabled(),
index_option.is_position_enabled(),
)
} else { } else {
(false, false) (false, false)
} }

View File

@@ -8,15 +8,13 @@ use crate::termdict::{TermDictionary, TermStreamer};
use crate::DocId; use crate::DocId;
use crate::TantivyError; use crate::TantivyError;
use crate::{Result, SkipResult}; use crate::{Result, SkipResult};
use std::sync::Arc;
use tantivy_fst::Automaton; use tantivy_fst::Automaton;
/// A weight struct for Fuzzy Term and Regex Queries /// A weight struct for Fuzzy Term and Regex Queries
pub struct AutomatonWeight<A> pub struct AutomatonWeight<A> {
where
A: Automaton + Send + Sync + 'static,
{
field: Field, field: Field,
automaton: A, automaton: Arc<A>,
} }
impl<A> AutomatonWeight<A> impl<A> AutomatonWeight<A>
@@ -24,12 +22,16 @@ where
A: Automaton + Send + Sync + 'static, A: Automaton + Send + Sync + 'static,
{ {
/// Create a new AutomationWeight /// Create a new AutomationWeight
pub fn new(field: Field, automaton: A) -> AutomatonWeight<A> { pub fn new<IntoArcA: Into<Arc<A>>>(field: Field, automaton: IntoArcA) -> AutomatonWeight<A> {
AutomatonWeight { field, automaton } AutomatonWeight {
field,
automaton: automaton.into(),
}
} }
fn automaton_stream<'a>(&'a self, term_dict: &'a TermDictionary) -> TermStreamer<'a, &'a A> { fn automaton_stream<'a>(&'a self, term_dict: &'a TermDictionary) -> TermStreamer<'a, &'a A> {
let term_stream_builder = term_dict.search(&self.automaton); let automaton: &A = &*self.automaton;
let term_stream_builder = term_dict.search(automaton);
term_stream_builder.into_stream() term_stream_builder.into_stream()
} }
} }

View File

@@ -28,12 +28,10 @@ static LEV_BUILDER: Lazy<HashMap<(u8, bool), LevenshteinAutomatonBuilder>> = Laz
/// containing a specific term that is within /// containing a specific term that is within
/// Levenshtein distance /// Levenshtein distance
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{Index, Result, Term};
/// use tantivy::collector::{Count, TopDocs}; /// use tantivy::collector::{Count, TopDocs};
/// use tantivy::query::FuzzyTermQuery; /// use tantivy::query::FuzzyTermQuery;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, Index, Result, Term};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {

View File

@@ -45,7 +45,7 @@ pub fn intersect_scorers(mut scorers: Vec<Box<dyn Scorer>>) -> Box<dyn Scorer> {
}) })
} }
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. /// Creates a `DocSet` that iterate through the intersection of two or more `DocSet`s.
pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet = Box<dyn Scorer>> { pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet = Box<dyn Scorer>> {
left: TDocSet, left: TDocSet,
right: TDocSet, right: TDocSet,

View File

@@ -5,7 +5,7 @@ use Score;
use SkipResult; use SkipResult;
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. /// Creates a `DocSet` that iterate through the intersection of two `DocSet`s.
pub struct IntersectionTwoTerms<TDocSet> { pub struct IntersectionTwoTerms<TDocSet> {
left: TDocSet, left: TDocSet,
right: TDocSet right: TDocSet

View File

@@ -12,7 +12,6 @@ mod exclude;
mod explanation; mod explanation;
mod fuzzy_query; mod fuzzy_query;
mod intersection; mod intersection;
mod occur;
mod phrase_query; mod phrase_query;
mod query; mod query;
mod query_parser; mod query_parser;
@@ -43,7 +42,6 @@ pub use self::exclude::Exclude;
pub use self::explanation::Explanation; pub use self::explanation::Explanation;
pub use self::fuzzy_query::FuzzyTermQuery; pub use self::fuzzy_query::FuzzyTermQuery;
pub use self::intersection::intersect_scorers; pub use self::intersection::intersect_scorers;
pub use self::occur::Occur;
pub use self::phrase_query::PhraseQuery; pub use self::phrase_query::PhraseQuery;
pub use self::query::Query; pub use self::query::Query;
pub use self::query_parser::QueryParser; pub use self::query_parser::QueryParser;
@@ -55,6 +53,7 @@ pub use self::scorer::ConstScorer;
pub use self::scorer::Scorer; pub use self::scorer::Scorer;
pub use self::term_query::TermQuery; pub use self::term_query::TermQuery;
pub use self::weight::Weight; pub use self::weight::Weight;
pub use tantivy_query_grammar::Occur;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {

View File

@@ -1,6 +1,4 @@
mod query_grammar;
mod query_parser; mod query_parser;
mod user_input_ast;
pub mod logical_ast; pub mod logical_ast;
pub use self::query_parser::QueryParser; pub use self::query_parser::QueryParser;

View File

@@ -1,9 +1,5 @@
use super::logical_ast::*; use super::logical_ast::*;
use super::query_grammar::parse_to_ast;
use super::user_input_ast::*;
use crate::core::Index; use crate::core::Index;
use crate::query::occur::compose_occur;
use crate::query::query_parser::logical_ast::LogicalAST;
use crate::query::AllQuery; use crate::query::AllQuery;
use crate::query::BooleanQuery; use crate::query::BooleanQuery;
use crate::query::EmptyQuery; use crate::query::EmptyQuery;
@@ -16,11 +12,11 @@ use crate::schema::IndexRecordOption;
use crate::schema::{Field, Schema}; use crate::schema::{Field, Schema};
use crate::schema::{FieldType, Term}; use crate::schema::{FieldType, Term};
use crate::tokenizer::TokenizerManager; use crate::tokenizer::TokenizerManager;
use combine::Parser;
use std::borrow::Cow; use std::borrow::Cow;
use std::num::{ParseFloatError, ParseIntError}; use std::num::{ParseFloatError, ParseIntError};
use std::ops::Bound; use std::ops::Bound;
use std::str::FromStr; use std::str::FromStr;
use tantivy_query_grammar::{UserInputAST, UserInputBound, UserInputLeaf};
/// Possible error that may happen when parsing a query. /// Possible error that may happen when parsing a query.
#[derive(Debug, PartialEq, Eq, Fail)] #[derive(Debug, PartialEq, Eq, Fail)]
@@ -222,9 +218,8 @@ impl QueryParser {
/// Parse the user query into an AST. /// Parse the user query into an AST.
fn parse_query_to_logical_ast(&self, query: &str) -> Result<LogicalAST, QueryParserError> { fn parse_query_to_logical_ast(&self, query: &str) -> Result<LogicalAST, QueryParserError> {
let (user_input_ast, _remaining) = parse_to_ast() let user_input_ast =
.parse(query) tantivy_query_grammar::parse_query(query).map_err(|_| QueryParserError::SyntaxError)?;
.map_err(|_| QueryParserError::SyntaxError)?;
self.compute_logical_ast(user_input_ast) self.compute_logical_ast(user_input_ast)
} }
@@ -399,7 +394,7 @@ impl QueryParser {
let mut logical_sub_queries: Vec<(Occur, LogicalAST)> = Vec::new(); let mut logical_sub_queries: Vec<(Occur, LogicalAST)> = Vec::new();
for sub_query in sub_queries { for sub_query in sub_queries {
let (occur, sub_ast) = self.compute_logical_ast_with_occur(sub_query)?; let (occur, sub_ast) = self.compute_logical_ast_with_occur(sub_query)?;
let new_occur = compose_occur(default_occur, occur); let new_occur = Occur::compose(default_occur, occur);
logical_sub_queries.push((new_occur, sub_ast)); logical_sub_queries.push((new_occur, sub_ast));
} }
Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries))) Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries)))
@@ -407,7 +402,7 @@ impl QueryParser {
UserInputAST::Unary(left_occur, subquery) => { UserInputAST::Unary(left_occur, subquery) => {
let (right_occur, logical_sub_queries) = let (right_occur, logical_sub_queries) =
self.compute_logical_ast_with_occur(*subquery)?; self.compute_logical_ast_with_occur(*subquery)?;
Ok((compose_occur(left_occur, right_occur), logical_sub_queries)) Ok((Occur::compose(left_occur, right_occur), logical_sub_queries))
} }
UserInputAST::Leaf(leaf) => { UserInputAST::Leaf(leaf) => {
let result_ast = self.compute_logical_ast_from_leaf(*leaf)?; let result_ast = self.compute_logical_ast_from_leaf(*leaf)?;

View File

@@ -38,14 +38,10 @@ fn map_bound<TFrom, TTo, Transform: Fn(&TFrom) -> TTo>(
/// # Example /// # Example
/// ///
/// ```rust /// ```rust
///
/// # #[macro_use]
/// # extern crate tantivy;
/// # use tantivy::Index;
/// # use tantivy::schema::{Schema, INDEXED};
/// # use tantivy::collector::Count; /// # use tantivy::collector::Count;
/// # use tantivy::Result;
/// # use tantivy::query::RangeQuery; /// # use tantivy::query::RangeQuery;
/// # use tantivy::schema::{Schema, INDEXED};
/// # use tantivy::{doc, Index, Result};
/// # /// #
/// # fn run() -> Result<()> { /// # fn run() -> Result<()> {
/// # let mut schema_builder = Schema::builder(); /// # let mut schema_builder = Schema::builder();

View File

@@ -4,22 +4,18 @@ use crate::schema::Field;
use crate::Result; use crate::Result;
use crate::Searcher; use crate::Searcher;
use std::clone::Clone; use std::clone::Clone;
use std::sync::Arc;
use tantivy_fst::Regex; use tantivy_fst::Regex;
// A Regex Query matches all of the documents /// A Regex Query matches all of the documents
/// containing a specific term that matches /// containing a specific term that matches
/// a regex pattern /// a regex pattern.
/// A Fuzzy Query matches all of the documents
/// containing a specific term that is within
/// Levenshtein distance
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{Index, Result, Term};
/// use tantivy::collector::Count; /// use tantivy::collector::Count;
/// use tantivy::query::RegexQuery; /// use tantivy::query::RegexQuery;
/// use tantivy::schema::{Schema, TEXT};
/// use tantivy::{doc, Index, Result, Term};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {
@@ -48,7 +44,7 @@ use tantivy_fst::Regex;
/// let searcher = reader.searcher(); /// let searcher = reader.searcher();
/// ///
/// let term = Term::from_field_text(title, "Diary"); /// let term = Term::from_field_text(title, "Diary");
/// let query = RegexQuery::new("d[ai]{2}ry".to_string(), title); /// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?;
/// let count = searcher.search(&query, &Count)?; /// let count = searcher.search(&query, &Count)?;
/// assert_eq!(count, 3); /// assert_eq!(count, 3);
/// Ok(()) /// Ok(())
@@ -56,30 +52,34 @@ use tantivy_fst::Regex;
/// ``` /// ```
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct RegexQuery { pub struct RegexQuery {
regex_pattern: String, regex: Arc<Regex>,
field: Field, field: Field,
} }
impl RegexQuery { impl RegexQuery {
/// Creates a new Fuzzy Query /// Creates a new RegexQuery from a given pattern
pub fn new(regex_pattern: String, field: Field) -> RegexQuery { pub fn from_pattern(regex_pattern: &str, field: Field) -> Result<Self> {
let regex = Regex::new(&regex_pattern)
.map_err(|_| TantivyError::InvalidArgument(regex_pattern.to_string()))?;
Ok(RegexQuery::from_regex(regex, field))
}
/// Creates a new RegexQuery from a fully built Regex
pub fn from_regex<T: Into<Arc<Regex>>>(regex: T, field: Field) -> Self {
RegexQuery { RegexQuery {
regex_pattern, regex: regex.into(),
field, field,
} }
} }
fn specialized_weight(&self) -> Result<AutomatonWeight<Regex>> { fn specialized_weight(&self) -> AutomatonWeight<Regex> {
let automaton = Regex::new(&self.regex_pattern) AutomatonWeight::new(self.field, self.regex.clone())
.map_err(|_| TantivyError::InvalidArgument(self.regex_pattern.clone()))?;
Ok(AutomatonWeight::new(self.field, automaton))
} }
} }
impl Query for RegexQuery { impl Query for RegexQuery {
fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<dyn Weight>> { fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<dyn Weight>> {
Ok(Box::new(self.specialized_weight()?)) Ok(Box::new(self.specialized_weight()))
} }
} }
@@ -87,13 +87,14 @@ impl Query for RegexQuery {
mod test { mod test {
use super::RegexQuery; use super::RegexQuery;
use crate::collector::TopDocs; use crate::collector::TopDocs;
use crate::schema::Schema;
use crate::schema::TEXT; use crate::schema::TEXT;
use crate::schema::{Field, Schema};
use crate::tests::assert_nearly_equals; use crate::tests::assert_nearly_equals;
use crate::Index; use crate::{Index, IndexReader};
use std::sync::Arc;
use tantivy_fst::Regex;
#[test] fn build_test_index() -> (IndexReader, Field) {
pub fn test_regex_query() {
let mut schema_builder = Schema::builder(); let mut schema_builder = Schema::builder();
let country_field = schema_builder.add_text_field("country", TEXT); let country_field = schema_builder.add_text_field("country", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();
@@ -109,20 +110,65 @@ mod test {
index_writer.commit().unwrap(); index_writer.commit().unwrap();
} }
let reader = index.reader().unwrap(); let reader = index.reader().unwrap();
(reader, country_field)
}
fn verify_regex_query(
query_matching_one: RegexQuery,
query_matching_zero: RegexQuery,
reader: IndexReader,
) {
let searcher = reader.searcher(); let searcher = reader.searcher();
{ {
let regex_query = RegexQuery::new("jap[ao]n".to_string(), country_field);
let scored_docs = searcher let scored_docs = searcher
.search(&regex_query, &TopDocs::with_limit(2)) .search(&query_matching_one, &TopDocs::with_limit(2))
.unwrap(); .unwrap();
assert_eq!(scored_docs.len(), 1, "Expected only 1 document"); assert_eq!(scored_docs.len(), 1, "Expected only 1 document");
let (score, _) = scored_docs[0]; let (score, _) = scored_docs[0];
assert_nearly_equals(1f32, score); assert_nearly_equals(1f32, score);
} }
let regex_query = RegexQuery::new("jap[A-Z]n".to_string(), country_field);
let top_docs = searcher let top_docs = searcher
.search(&regex_query, &TopDocs::with_limit(2)) .search(&query_matching_zero, &TopDocs::with_limit(2))
.unwrap(); .unwrap();
assert!(top_docs.is_empty(), "Expected ZERO document"); assert!(top_docs.is_empty(), "Expected ZERO document");
} }
#[test]
pub fn test_regex_query() {
let (reader, field) = build_test_index();
let matching_one = RegexQuery::from_pattern("jap[ao]n", field).unwrap();
let matching_zero = RegexQuery::from_pattern("jap[A-Z]n", field).unwrap();
verify_regex_query(matching_one, matching_zero, reader);
}
#[test]
pub fn test_construct_from_regex() {
let (reader, field) = build_test_index();
let matching_one = RegexQuery::from_regex(Regex::new("jap[ao]n").unwrap(), field);
let matching_zero = RegexQuery::from_regex(Regex::new("jap[A-Z]n").unwrap(), field);
verify_regex_query(matching_one, matching_zero, reader);
}
#[test]
pub fn test_construct_from_reused_regex() {
let r1 = Arc::new(Regex::new("jap[ao]n").unwrap());
let r2 = Arc::new(Regex::new("jap[A-Z]n").unwrap());
let (reader, field) = build_test_index();
let matching_one = RegexQuery::from_regex(r1.clone(), field);
let matching_zero = RegexQuery::from_regex(r2.clone(), field);
verify_regex_query(matching_one, matching_zero, reader.clone());
let matching_one = RegexQuery::from_regex(r1.clone(), field);
let matching_zero = RegexQuery::from_regex(r2.clone(), field);
verify_regex_query(matching_one, matching_zero, reader.clone());
}
} }

View File

@@ -20,12 +20,10 @@ use std::fmt;
/// * `field norm` - number of tokens in the field. /// * `field norm` - number of tokens in the field.
/// ///
/// ```rust /// ```rust
/// #[macro_use]
/// extern crate tantivy;
/// use tantivy::schema::{Schema, TEXT, IndexRecordOption};
/// use tantivy::{Index, Result, Term};
/// use tantivy::collector::{Count, TopDocs}; /// use tantivy::collector::{Count, TopDocs};
/// use tantivy::query::TermQuery; /// use tantivy::query::TermQuery;
/// use tantivy::schema::{Schema, TEXT, IndexRecordOption};
/// use tantivy::{doc, Index, Result, Term};
/// ///
/// # fn main() { example().unwrap(); } /// # fn main() { example().unwrap(); }
/// fn example() -> Result<()> { /// fn example() -> Result<()> {

View File

@@ -28,7 +28,7 @@ where
} }
} }
/// Creates a `DocSet` that iterator through the intersection of two `DocSet`s. /// Creates a `DocSet` that iterate through the union of two or more `DocSet`s.
pub struct Union<TScorer, TScoreCombiner = DoNothingCombiner> { pub struct Union<TScorer, TScoreCombiner = DoNothingCombiner> {
docsets: Vec<TScorer>, docsets: Vec<TScorer>,
bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>, bitsets: Box<[TinySet; HORIZON_NUM_TINYBITSETS]>,

View File

@@ -29,22 +29,6 @@ pub enum IndexRecordOption {
} }
impl IndexRecordOption { impl IndexRecordOption {
/// Returns true iff the term frequency will be encoded.
pub fn is_termfreq_enabled(self) -> bool {
match self {
IndexRecordOption::WithFreqsAndPositions | IndexRecordOption::WithFreqs => true,
_ => false,
}
}
/// Returns true iff the term positions within the document are stored as well.
pub fn is_position_enabled(self) -> bool {
match self {
IndexRecordOption::WithFreqsAndPositions => true,
_ => false,
}
}
/// Returns true iff this option includes encoding /// Returns true iff this option includes encoding
/// term frequencies. /// term frequencies.
pub fn has_freq(self) -> bool { pub fn has_freq(self) -> bool {

View File

@@ -301,28 +301,26 @@ impl Schema {
let mut doc = Document::default(); let mut doc = Document::default();
for (field_name, json_value) in json_obj.iter() { for (field_name, json_value) in json_obj.iter() {
match self.get_field(field_name) { let field = self
Some(field) => { .get_field(field_name)
let field_entry = self.get_field_entry(field); .ok_or_else(|| DocParsingError::NoSuchFieldInSchema(field_name.clone()))?;
let field_type = field_entry.field_type(); let field_entry = self.get_field_entry(field);
match *json_value { let field_type = field_entry.field_type();
JsonValue::Array(ref json_items) => { match *json_value {
for json_item in json_items { JsonValue::Array(ref json_items) => {
let value = field_type.value_from_json(json_item).map_err(|e| { for json_item in json_items {
DocParsingError::ValueError(field_name.clone(), e) let value = field_type
})?; .value_from_json(json_item)
doc.add(FieldValue::new(field, value)); .map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
} doc.add(FieldValue::new(field, value));
}
_ => {
let value = field_type
.value_from_json(json_value)
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
doc.add(FieldValue::new(field, value));
}
} }
} }
None => return Err(DocParsingError::NoSuchFieldInSchema(field_name.clone())), _ => {
let value = field_type
.value_from_json(json_value)
.map_err(|e| DocParsingError::ValueError(field_name.clone(), e))?;
doc.add(FieldValue::new(field, value));
}
} }
} }
Ok(doc) Ok(doc)

View File

@@ -22,10 +22,10 @@ impl Term {
/// Builds a term given a field, and a i64-value /// Builds a term given a field, and a i64-value
/// ///
/// Assuming the term has a field id of 1, and a i64 value of 3234, /// Assuming the term has a field id of 1, and a i64 value of 3234,
/// the Term will have 8 bytes. /// the Term will have 12 bytes.
/// ///
/// The first four byte are dedicated to storing the field id as a u64. /// The first four byte are dedicated to storing the field id as a u64.
/// The 4 following bytes are encoding the u64 value. /// The 8 following bytes are encoding the u64 value.
pub fn from_field_i64(field: Field, val: i64) -> Term { pub fn from_field_i64(field: Field, val: i64) -> Term {
let val_u64: u64 = common::i64_to_u64(val); let val_u64: u64 = common::i64_to_u64(val);
Term::from_field_u64(field, val_u64) Term::from_field_u64(field, val_u64)
@@ -33,11 +33,11 @@ impl Term {
/// Builds a term given a field, and a f64-value /// Builds a term given a field, and a f64-value
/// ///
/// Assuming the term has a field id of 1, and a u64 value of 3234, /// Assuming the term has a field id of 1, and a f64 value of 1.5,
/// the Term will have 8 bytes. <= this is wrong /// the Term will have 12 bytes.
/// ///
/// The first four byte are dedicated to storing the field id as a u64. /// The first four byte are dedicated to storing the field id as a u64.
/// The 4 following bytes are encoding the u64 value. /// The 8 following bytes are encoding the f64 as a u64 value.
pub fn from_field_f64(field: Field, val: f64) -> Term { pub fn from_field_f64(field: Field, val: f64) -> Term {
let val_u64: u64 = common::f64_to_u64(val); let val_u64: u64 = common::f64_to_u64(val);
Term::from_field_u64(field, val_u64) Term::from_field_u64(field, val_u64)
@@ -46,10 +46,10 @@ impl Term {
/// Builds a term given a field, and a DateTime value /// Builds a term given a field, and a DateTime value
/// ///
/// Assuming the term has a field id of 1, and a timestamp i64 value of 3234, /// Assuming the term has a field id of 1, and a timestamp i64 value of 3234,
/// the Term will have 8 bytes. /// the Term will have 12 bytes.
/// ///
/// The first four byte are dedicated to storing the field id as a u64. /// The first four byte are dedicated to storing the field id as a u64.
/// The 4 following bytes are encoding the DateTime as i64 timestamp value. /// The 8 following bytes are encoding the DateTime as i64 timestamp value.
pub fn from_field_date(field: Field, val: &DateTime) -> Term { pub fn from_field_date(field: Field, val: &DateTime) -> Term {
let val_timestamp = val.timestamp(); let val_timestamp = val.timestamp();
Term::from_field_i64(field, val_timestamp) Term::from_field_i64(field, val_timestamp)
@@ -82,10 +82,10 @@ impl Term {
/// Builds a term given a field, and a u64-value /// Builds a term given a field, and a u64-value
/// ///
/// Assuming the term has a field id of 1, and a u64 value of 3234, /// Assuming the term has a field id of 1, and a u64 value of 3234,
/// the Term will have 8 bytes. /// the Term will have 12 bytes.
/// ///
/// The first four byte are dedicated to storing the field id as a u64. /// The first four byte are dedicated to storing the field id as a u64.
/// The 4 following bytes are encoding the u64 value. /// The 8 following bytes are encoding the u64 value.
pub fn from_field_u64(field: Field, val: u64) -> Term { pub fn from_field_u64(field: Field, val: u64) -> Term {
let mut term = Term(vec![0u8; INT_TERM_LEN]); let mut term = Term(vec![0u8; INT_TERM_LEN]);
term.set_field(field); term.set_field(field);
@@ -182,7 +182,7 @@ where
/// ///
/// # Panics /// # Panics
/// ... or returns an invalid value /// ... or returns an invalid value
/// if the term is not a `i64` field. /// if the term is not a `f64` field.
pub fn get_f64(&self) -> f64 { pub fn get_f64(&self) -> f64 {
common::u64_to_f64(BigEndian::read_u64(&self.0.as_ref()[4..])) common::u64_to_f64(BigEndian::read_u64(&self.0.as_ref()[4..]))
} }

View File

@@ -213,11 +213,9 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
/// # Example /// # Example
/// ///
/// ```rust /// ```rust
/// # #[macro_use]
/// # extern crate tantivy;
/// # use tantivy::Index;
/// # use tantivy::schema::{Schema, TEXT};
/// # use tantivy::query::QueryParser; /// # use tantivy::query::QueryParser;
/// # use tantivy::schema::{Schema, TEXT};
/// # use tantivy::{doc, Index};
/// use tantivy::SnippetGenerator; /// use tantivy::SnippetGenerator;
/// ///
/// # fn main() -> tantivy::Result<()> { /// # fn main() -> tantivy::Result<()> {

View File

@@ -1,6 +1,5 @@
//! # Example //! # Example
//! ``` //! ```rust
//! extern crate tantivy;
//! use tantivy::tokenizer::*; //! use tantivy::tokenizer::*;
//! //!
//! # fn main() { //! # fn main() {

View File

@@ -4,8 +4,7 @@
//! You must define in your schema which tokenizer should be used for //! You must define in your schema which tokenizer should be used for
//! each of your fields : //! each of your fields :
//! //!
//! ``` //! ```rust
//! extern crate tantivy;
//! use tantivy::schema::*; //! use tantivy::schema::*;
//! //!
//! # fn main() { //! # fn main() {
@@ -65,8 +64,6 @@
//! For instance, the `en_stem` is defined as follows. //! For instance, the `en_stem` is defined as follows.
//! //!
//! ```rust //! ```rust
//! # extern crate tantivy;
//!
//! use tantivy::tokenizer::*; //! use tantivy::tokenizer::*;
//! //!
//! # fn main() { //! # fn main() {
@@ -80,8 +77,7 @@
//! Once your tokenizer is defined, you need to //! Once your tokenizer is defined, you need to
//! register it with a name in your index's [`TokenizerManager`](./struct.TokenizerManager.html). //! register it with a name in your index's [`TokenizerManager`](./struct.TokenizerManager.html).
//! //!
//! ``` //! ```rust
//! # extern crate tantivy;
//! # use tantivy::schema::Schema; //! # use tantivy::schema::Schema;
//! # use tantivy::tokenizer::*; //! # use tantivy::tokenizer::*;
//! # use tantivy::Index; //! # use tantivy::Index;
@@ -101,8 +97,7 @@
//! //!
//! # Example //! # Example
//! //!
//! ``` //! ```rust
//! extern crate tantivy;
//! use tantivy::schema::{Schema, IndexRecordOption, TextOptions, TextFieldIndexing}; //! use tantivy::schema::{Schema, IndexRecordOption, TextOptions, TextFieldIndexing};
//! use tantivy::tokenizer::*; //! use tantivy::tokenizer::*;
//! use tantivy::Index; //! use tantivy::Index;

View File

@@ -29,8 +29,7 @@ use super::{Token, TokenStream, Tokenizer};
/// ///
/// # Example /// # Example
/// ///
/// ``` /// ```rust
/// # extern crate tantivy;
/// use tantivy::tokenizer::*; /// use tantivy::tokenizer::*;
/// # fn main() { /// # fn main() {
/// let tokenizer = NgramTokenizer::new(2, 3, false); /// let tokenizer = NgramTokenizer::new(2, 3, false);

View File

@@ -1,6 +1,5 @@
//! # Example //! # Example
//! ``` //! ```rust
//! extern crate tantivy;
//! use tantivy::tokenizer::*; //! use tantivy::tokenizer::*;
//! //!
//! # fn main() { //! # fn main() {

View File

@@ -1,6 +1,5 @@
//! # Example //! # Example
//! ``` //! ```rust
//! extern crate tantivy;
//! use tantivy::tokenizer::*; //! use tantivy::tokenizer::*;
//! //!
//! # fn main() { //! # fn main() {