Fix stackoverflow and add docs.

This commit is contained in:
François Massot
2023-06-30 13:49:39 +02:00
committed by Paul Masurel
parent 81330aaf89
commit 0a23201338
3 changed files with 35 additions and 45 deletions

View File

@@ -1,5 +1,7 @@
use criterion::{criterion_group, criterion_main, Criterion};
use tantivy::tokenizer::{TokenizerManager, TextAnalyzer, RemoveLongFilter, LowerCaser, SimpleTokenizer};
use tantivy::tokenizer::{
LowerCaser, RemoveLongFilter, SimpleTokenizer, TextAnalyzer, TokenizerManager,
};
const ALICE_TXT: &str = include_str!("alice.txt");
@@ -16,20 +18,6 @@ pub fn criterion_benchmark(c: &mut Criterion) {
assert_eq!(word_count, 30_731);
})
});
let mut static_analyzer = TextAnalyzer::builder(SimpleTokenizer::default())
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.build();
c.bench_function("static-tokenize-alice", |b| {
b.iter(|| {
let mut word_count = 0;
let mut token_stream = static_analyzer.token_stream(ALICE_TXT);
while token_stream.advance() {
word_count += 1;
}
assert_eq!(word_count, 30_731);
})
});
let mut dynamic_analyzer = TextAnalyzer::builder(SimpleTokenizer::default())
.dynamic()
.filter_dynamic(RemoveLongFilter::limit(40))