mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-27 04:29:58 +00:00
fix clippy
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use tantivy::tokenizer::TokenizerManager;
|
||||
|
||||
const ALICE_TXT: &'static str = include_str!("alice.txt");
|
||||
const ALICE_TXT: &str = include_str!("alice.txt");
|
||||
|
||||
pub fn criterion_benchmark(c: &mut Criterion) {
|
||||
let tokenizer_manager = TokenizerManager::default();
|
||||
|
||||
@@ -139,7 +139,7 @@ fn main() -> tantivy::Result<()> {
|
||||
//
|
||||
// Lets index a bunch of fake documents for the sake of
|
||||
// this example.
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
index_writer.add_document(doc!(
|
||||
|
||||
@@ -12,7 +12,7 @@ fn main() -> tantivy::Result<()> {
|
||||
let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let mut index_writer = index.writer(30_000_000)?;
|
||||
|
||||
@@ -51,7 +51,7 @@ fn main() -> tantivy::Result<()> {
|
||||
let query = BooleanQuery::new_multiterms_query(
|
||||
facets
|
||||
.iter()
|
||||
.map(|key| Term::from_facet(ingredient, &key))
|
||||
.map(|key| Term::from_facet(ingredient, key))
|
||||
.collect(),
|
||||
);
|
||||
let top_docs_by_custom_score =
|
||||
|
||||
@@ -22,7 +22,7 @@ fn main() -> tantivy::Result<()> {
|
||||
let title = schema_builder.add_text_field("title", TEXT | STORED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let mut index_writer = index.writer_with_num_threads(1, 50_000_000)?;
|
||||
index_writer.add_document(doc!(title => "The Old Man and the Sea"));
|
||||
|
||||
@@ -82,7 +82,7 @@ fn main() -> tantivy::Result<()> {
|
||||
}]
|
||||
}"#;
|
||||
|
||||
let short_man_doc = schema.parse_document(&short_man_json)?;
|
||||
let short_man_doc = schema.parse_document(short_man_json)?;
|
||||
|
||||
index_writer.add_document(short_man_doc);
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ fn main() -> tantivy::Result<()> {
|
||||
let schema = schema_builder.build();
|
||||
|
||||
// # Indexing documents
|
||||
let index = Index::create_in_dir(&index_path, schema.clone())?;
|
||||
let index = Index::create_in_dir(&index_path, schema)?;
|
||||
|
||||
let mut index_writer = index.writer(50_000_000)?;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use tantivy;
|
||||
|
||||
use tantivy::schema::*;
|
||||
|
||||
// # Document from json
|
||||
@@ -22,7 +22,7 @@ fn main() -> tantivy::Result<()> {
|
||||
}"#;
|
||||
|
||||
// We can parse our document
|
||||
let _mice_and_men_doc = schema.parse_document(&mice_and_men_doc_json)?;
|
||||
let _mice_and_men_doc = schema.parse_document(mice_and_men_doc_json)?;
|
||||
|
||||
// Multi-valued field are allowed, they are
|
||||
// expressed in JSON by an array.
|
||||
@@ -31,7 +31,7 @@ fn main() -> tantivy::Result<()> {
|
||||
"title": ["Frankenstein", "The Modern Prometheus"],
|
||||
"year": 1818
|
||||
}"#;
|
||||
let _frankenstein_doc = schema.parse_document(&frankenstein_json)?;
|
||||
let _frankenstein_doc = schema.parse_document(frankenstein_json)?;
|
||||
|
||||
// Note that the schema is saved in your index directory.
|
||||
//
|
||||
|
||||
@@ -1080,7 +1080,7 @@ mod tests {
|
||||
query: &str,
|
||||
query_field: Field,
|
||||
schema: Schema,
|
||||
mut doc_adder: impl FnMut(&mut IndexWriter) -> (),
|
||||
mut doc_adder: impl FnMut(&mut IndexWriter),
|
||||
) -> (Index, Box<dyn Query>) {
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
||||
|
||||
@@ -369,7 +369,7 @@ mod tests {
|
||||
schema::{Schema, TEXT},
|
||||
IndexSettings, IndexSortByField, Order,
|
||||
};
|
||||
use serde_json;
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_serialize_metas() {
|
||||
|
||||
@@ -22,7 +22,7 @@ use std::sync::atomic;
|
||||
pub struct SegmentId(Uuid);
|
||||
|
||||
#[cfg(test)]
|
||||
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(|| atomic::AtomicUsize::default());
|
||||
static AUTO_INC_COUNTER: Lazy<atomic::AtomicUsize> = Lazy::new(atomic::AtomicUsize::default);
|
||||
|
||||
#[cfg(test)]
|
||||
const ZERO_ARRAY: [u8; 8] = [0u8; 8];
|
||||
|
||||
@@ -211,7 +211,7 @@ mod tests {
|
||||
assert_eq!(right.read_bytes()?.as_slice(), b"");
|
||||
}
|
||||
{
|
||||
let (left, right) = file_slice.clone().split_from_end(2);
|
||||
let (left, right) = file_slice.split_from_end(2);
|
||||
assert_eq!(left.read_bytes()?.as_slice(), b"abcd");
|
||||
assert_eq!(right.read_bytes()?.as_slice(), b"ef");
|
||||
}
|
||||
|
||||
@@ -430,7 +430,7 @@ mod tests_mmap_specific {
|
||||
assert_eq!(read_file.as_slice(), &[3u8, 4u8, 5u8]);
|
||||
assert!(managed_directory.list_damaged().unwrap().is_empty());
|
||||
|
||||
let mut corrupted_path = tempdir_path.clone();
|
||||
let mut corrupted_path = tempdir_path;
|
||||
corrupted_path.push(test_path2);
|
||||
let mut file = OpenOptions::new().write(true).open(&corrupted_path)?;
|
||||
file.write_all(&[255u8])?;
|
||||
|
||||
@@ -166,26 +166,26 @@ fn test_write_create_the_file(directory: &dyn Directory) {
|
||||
fn test_directory_delete(directory: &dyn Directory) -> crate::Result<()> {
|
||||
let test_path: &'static Path = Path::new("some_path_for_test");
|
||||
assert!(directory.open_read(test_path).is_err());
|
||||
let mut write_file = directory.open_write(&test_path)?;
|
||||
let mut write_file = directory.open_write(test_path)?;
|
||||
write_file.write_all(&[1, 2, 3, 4])?;
|
||||
write_file.flush()?;
|
||||
{
|
||||
let read_handle = directory.open_read(&test_path)?.read_bytes()?;
|
||||
let read_handle = directory.open_read(test_path)?.read_bytes()?;
|
||||
assert_eq!(read_handle.as_slice(), &[1u8, 2u8, 3u8, 4u8]);
|
||||
// Mapped files can't be deleted on Windows
|
||||
if !cfg!(windows) {
|
||||
assert!(directory.delete(&test_path).is_ok());
|
||||
assert!(directory.delete(test_path).is_ok());
|
||||
assert_eq!(read_handle.as_slice(), &[1u8, 2u8, 3u8, 4u8]);
|
||||
}
|
||||
assert!(directory.delete(Path::new("SomeOtherPath")).is_err());
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
assert!(directory.delete(&test_path).is_ok());
|
||||
assert!(directory.delete(test_path).is_ok());
|
||||
}
|
||||
|
||||
assert!(directory.open_read(&test_path).is_err());
|
||||
assert!(directory.delete(&test_path).is_err());
|
||||
assert!(directory.open_read(test_path).is_err());
|
||||
assert!(directory.delete(test_path).is_err());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -405,9 +405,9 @@ mod tests {
|
||||
.unwrap();
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
let file = directory.open_read(&path).unwrap();
|
||||
let file = directory.open_read(path).unwrap();
|
||||
//assert_eq!(file.len(), 17710 as usize); //bitpacked size
|
||||
assert_eq!(file.len(), 10175 as usize); // linear interpol size
|
||||
assert_eq!(file.len(), 10175_usize); // linear interpol size
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||
@@ -447,7 +447,7 @@ mod tests {
|
||||
serializer.close().unwrap();
|
||||
}
|
||||
|
||||
let file = directory.open_read(&path).unwrap();
|
||||
let file = directory.open_read(path).unwrap();
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file).unwrap();
|
||||
let data = fast_fields_composite.open_read(i64_field).unwrap();
|
||||
@@ -480,7 +480,7 @@ mod tests {
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
}
|
||||
let file = directory.open_read(&path)?;
|
||||
let file = directory.open_read(path)?;
|
||||
{
|
||||
let fast_fields_composite = CompositeFile::open(&file)?;
|
||||
let data = fast_fields_composite.open_read(*FIELD).unwrap();
|
||||
|
||||
@@ -90,7 +90,7 @@ mod tests {
|
||||
{
|
||||
let parser = QueryParser::for_index(&index, vec![date_field]);
|
||||
let query = parser
|
||||
.parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()).to_string())
|
||||
.parse_query(&format!("\"{}\"", first_time_stamp.to_rfc3339()))
|
||||
.expect("could not parse query");
|
||||
let results = searcher
|
||||
.search(&query, &TopDocs::with_limit(5))
|
||||
@@ -121,7 +121,7 @@ mod tests {
|
||||
{
|
||||
let parser = QueryParser::for_index(&index, vec![date_field]);
|
||||
let query = parser
|
||||
.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()).to_string())
|
||||
.parse_query(&format!("\"{}\"", two_secs_ahead.to_rfc3339()))
|
||||
.expect("could not parse query");
|
||||
let results = searcher
|
||||
.search(&query, &TopDocs::with_limit(5))
|
||||
|
||||
@@ -47,7 +47,7 @@ fn codec_estimation<T: FastFieldCodecSerializer, A: FastFieldDataAccess>(
|
||||
return;
|
||||
}
|
||||
let (ratio, name, id) = (
|
||||
T::estimate(fastfield_accessor, stats.clone()),
|
||||
T::estimate(fastfield_accessor, stats),
|
||||
T::NAME,
|
||||
T::ID,
|
||||
);
|
||||
|
||||
@@ -38,7 +38,7 @@ fn test_functional_store() -> crate::Result<()> {
|
||||
for iteration in 0..500 {
|
||||
dbg!(iteration);
|
||||
let num_docs: usize = rng.gen_range(0..4);
|
||||
if doc_set.len() >= 1 {
|
||||
if !doc_set.is_empty() {
|
||||
let doc_to_remove_id = rng.gen_range(0..doc_set.len());
|
||||
let removed_doc_id = doc_set.swap_remove(doc_to_remove_id);
|
||||
index_writer.delete_term(Term::from_field_u64(id_field, removed_doc_id));
|
||||
@@ -88,19 +88,17 @@ fn test_functional_indexing() -> crate::Result<()> {
|
||||
&searcher,
|
||||
&committed_docs.iter().cloned().collect::<Vec<u64>>(),
|
||||
)?;
|
||||
} else if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
|
||||
let doc_id_term = Term::from_field_u64(id_field, random_val);
|
||||
index_writer.delete_term(doc_id_term);
|
||||
} else {
|
||||
if committed_docs.remove(&random_val) || uncommitted_docs.remove(&random_val) {
|
||||
let doc_id_term = Term::from_field_u64(id_field, random_val);
|
||||
index_writer.delete_term(doc_id_term);
|
||||
} else {
|
||||
uncommitted_docs.insert(random_val);
|
||||
let mut doc = Document::new();
|
||||
doc.add_u64(id_field, random_val);
|
||||
for i in 1u64..10u64 {
|
||||
doc.add_u64(multiples_field, random_val * i);
|
||||
}
|
||||
index_writer.add_document(doc);
|
||||
uncommitted_docs.insert(random_val);
|
||||
let mut doc = Document::new();
|
||||
doc.add_u64(id_field, random_val);
|
||||
for i in 1u64..10u64 {
|
||||
doc.add_u64(multiples_field, random_val * i);
|
||||
}
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use super::operation::DeleteOperation;
|
||||
use crate::Opstamp;
|
||||
use std::mem;
|
||||
|
||||
use std::ops::DerefMut;
|
||||
use std::sync::{Arc, RwLock, Weak};
|
||||
|
||||
@@ -105,7 +105,7 @@ impl DeleteQueue {
|
||||
return None;
|
||||
}
|
||||
|
||||
let delete_operations = mem::replace(&mut self_wlock.writer, vec![]);
|
||||
let delete_operations = std::mem::take(&mut self_wlock.writer);
|
||||
|
||||
let new_block = Arc::new(Block {
|
||||
operations: Arc::from(delete_operations.into_boxed_slice()),
|
||||
@@ -286,7 +286,7 @@ mod tests {
|
||||
operations_it.advance();
|
||||
}
|
||||
{
|
||||
let mut operations_it = snapshot.clone();
|
||||
let mut operations_it = snapshot;
|
||||
assert_eq!(operations_it.get().unwrap().opstamp, 1);
|
||||
operations_it.advance();
|
||||
assert_eq!(operations_it.get().unwrap().opstamp, 2);
|
||||
|
||||
@@ -355,7 +355,7 @@ impl IndexWriter {
|
||||
// dropping the last reference to the segment_updater.
|
||||
self.drop_sender();
|
||||
|
||||
let former_workers_handles = mem::replace(&mut self.workers_join_handle, vec![]);
|
||||
let former_workers_handles = std::mem::take(&mut self.workers_join_handle);
|
||||
for join_handle in former_workers_handles {
|
||||
join_handle
|
||||
.join()
|
||||
@@ -625,7 +625,7 @@ impl IndexWriter {
|
||||
// and recreate a new one.
|
||||
self.recreate_document_channel();
|
||||
|
||||
let former_workers_join_handle = mem::replace(&mut self.workers_join_handle, Vec::new());
|
||||
let former_workers_join_handle = std::mem::take(&mut self.workers_join_handle);
|
||||
|
||||
for worker_handle in former_workers_join_handle {
|
||||
let indexing_worker_result = worker_handle
|
||||
|
||||
@@ -216,7 +216,7 @@ impl IndexMerger {
|
||||
let mut readers_with_min_sort_values = readers
|
||||
.into_iter()
|
||||
.map(|reader| {
|
||||
let accessor = Self::get_sort_field_accessor(&reader, &sort_by_field)?;
|
||||
let accessor = Self::get_sort_field_accessor(&reader, sort_by_field)?;
|
||||
Ok((reader, accessor.min_value()))
|
||||
})
|
||||
.collect::<crate::Result<Vec<_>>>()?;
|
||||
@@ -322,7 +322,7 @@ impl IndexMerger {
|
||||
.expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen.");
|
||||
compute_min_max_val(&u64_reader, reader.max_doc(), reader.delete_bitset())
|
||||
})
|
||||
.filter_map(|x| x)
|
||||
.flatten()
|
||||
.reduce(|a, b| {
|
||||
(a.0.min(b.0), a.1.max(b.1))
|
||||
}).expect("Unexpected error, empty readers in IndexMerger");
|
||||
@@ -404,7 +404,7 @@ impl IndexMerger {
|
||||
reader: &SegmentReader,
|
||||
sort_by_field: &IndexSortByField,
|
||||
) -> crate::Result<impl FastFieldReader<u64>> {
|
||||
let field_id = expect_field_id_for_sort_field(&reader.schema(), &sort_by_field)?; // for now expect fastfield, but not strictly required
|
||||
let field_id = expect_field_id_for_sort_field(reader.schema(), sort_by_field)?; // for now expect fastfield, but not strictly required
|
||||
let value_accessor = reader.fast_fields().u64_lenient(field_id)?;
|
||||
Ok(value_accessor)
|
||||
}
|
||||
|
||||
@@ -716,7 +716,7 @@ mod tests {
|
||||
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
// docs exist, should have at least 1 segment
|
||||
assert!(seg_ids.len() > 0);
|
||||
assert!(!seg_ids.is_empty());
|
||||
|
||||
let term_vals = vec!["a", "b", "c", "d", "e", "f"];
|
||||
for term_val in term_vals {
|
||||
|
||||
@@ -191,7 +191,7 @@ impl SegmentWriter {
|
||||
.process(&mut |token| {
|
||||
term_buffer.set_text(&token.text);
|
||||
let unordered_term_id =
|
||||
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||
multifield_postings.subscribe(doc_id, term_buffer);
|
||||
unordered_term_id_opt = Some(unordered_term_id);
|
||||
});
|
||||
if let Some(unordered_term_id) = unordered_term_id_opt {
|
||||
@@ -252,7 +252,7 @@ impl SegmentWriter {
|
||||
.u64_value()
|
||||
.ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_u64(u64_val);
|
||||
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||
multifield_postings.subscribe(doc_id, term_buffer);
|
||||
}
|
||||
}
|
||||
FieldType::Date(_) => {
|
||||
@@ -263,7 +263,7 @@ impl SegmentWriter {
|
||||
.date_value()
|
||||
.ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_i64(date_val.timestamp());
|
||||
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||
multifield_postings.subscribe(doc_id, term_buffer);
|
||||
}
|
||||
}
|
||||
FieldType::I64(_) => {
|
||||
@@ -274,7 +274,7 @@ impl SegmentWriter {
|
||||
.i64_value()
|
||||
.ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_i64(i64_val);
|
||||
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||
multifield_postings.subscribe(doc_id, term_buffer);
|
||||
}
|
||||
}
|
||||
FieldType::F64(_) => {
|
||||
@@ -285,7 +285,7 @@ impl SegmentWriter {
|
||||
.f64_value()
|
||||
.ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_f64(f64_val);
|
||||
multifield_postings.subscribe(doc_id, &term_buffer);
|
||||
multifield_postings.subscribe(doc_id, term_buffer);
|
||||
}
|
||||
}
|
||||
FieldType::Bytes(_) => {
|
||||
@@ -296,7 +296,7 @@ impl SegmentWriter {
|
||||
.bytes_value()
|
||||
.ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_bytes(bytes);
|
||||
self.multifield_postings.subscribe(doc_id, &term_buffer);
|
||||
self.multifield_postings.subscribe(doc_id, term_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -933,7 +933,7 @@ mod tests {
|
||||
let id = schema_builder.add_u64_field("id", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
let index_reader = index.reader()?;
|
||||
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
@@ -972,7 +972,7 @@ mod tests {
|
||||
let searcher = index_reader.searcher();
|
||||
let segment_ids: Vec<SegmentId> = searcher
|
||||
.segment_readers()
|
||||
.into_iter()
|
||||
.iter()
|
||||
.map(|reader| reader.segment_id())
|
||||
.collect();
|
||||
block_on(index_writer.merge(&segment_ids)).unwrap();
|
||||
|
||||
@@ -46,7 +46,7 @@ pub mod tests {
|
||||
fn create_positions_data(vals: &[u32]) -> crate::Result<OwnedBytes> {
|
||||
let mut positions_buffer = vec![];
|
||||
let mut serializer = PositionSerializer::new(&mut positions_buffer);
|
||||
serializer.write_positions_delta(&vals);
|
||||
serializer.write_positions_delta(vals);
|
||||
serializer.close_term()?;
|
||||
serializer.close()?;
|
||||
Ok(OwnedBytes::new(positions_buffer))
|
||||
@@ -169,7 +169,7 @@ pub mod tests {
|
||||
let positions_delta: Vec<u32> = (0..2_000_000).collect();
|
||||
let positions_data = create_positions_data(&positions_delta[..])?;
|
||||
assert_eq!(positions_data.len(), 5003499);
|
||||
let mut position_reader = PositionReader::open(positions_data.clone())?;
|
||||
let mut position_reader = PositionReader::open(positions_data)?;
|
||||
let mut buf = [0u32; 256];
|
||||
position_reader.read(128, &mut buf);
|
||||
for i in 0..256 {
|
||||
|
||||
@@ -57,7 +57,7 @@ mod sse2 {
|
||||
fn test_linear_search_sse2_128_u32() {
|
||||
let mut block = [0u32; COMPRESSION_BLOCK_SIZE];
|
||||
for el in 0u32..128u32 {
|
||||
block[el as usize] = el * 2 + 1 << 18;
|
||||
block[el as usize] = (el * 2 + 1) << 18;
|
||||
}
|
||||
let target = block[64] + 1;
|
||||
assert_eq!(linear_search_sse2_128(&AlignedBuffer(block), target), 65);
|
||||
@@ -91,7 +91,7 @@ fn exponential_search(arr: &[u32], target: u32) -> Range<usize> {
|
||||
|
||||
#[inline(never)]
|
||||
fn galloping(block_docs: &[u32], target: u32) -> usize {
|
||||
let range = exponential_search(&block_docs, target);
|
||||
let range = exponential_search(block_docs, target);
|
||||
range.start + linear_search(&block_docs[range], target)
|
||||
}
|
||||
|
||||
|
||||
@@ -13,11 +13,7 @@ use crate::schema::IndexRecordOption;
|
||||
use crate::{DocId, Score, TERMINATED};
|
||||
|
||||
fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
|
||||
if let Some(first) = it.next() {
|
||||
Some(it.fold(first, Score::max))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
it.next().map(|first| it.fold(first, Score::max))
|
||||
}
|
||||
|
||||
/// `BlockSegmentPostings` is a cursor iterating over blocks
|
||||
|
||||
@@ -303,7 +303,7 @@ pub mod tests {
|
||||
assert!(encoded_data.len() <= expected_length);
|
||||
let mut decoder = BlockDecoder::default();
|
||||
let consumed_num_bytes =
|
||||
decoder.uncompress_vint_sorted(&encoded_data, *offset, input.len(), PADDING_VALUE);
|
||||
decoder.uncompress_vint_sorted(encoded_data, *offset, input.len(), PADDING_VALUE);
|
||||
assert_eq!(consumed_num_bytes, encoded_data.len());
|
||||
assert_eq!(input, decoder.output_array());
|
||||
for i in input.len()..COMPRESSION_BLOCK_SIZE {
|
||||
|
||||
@@ -153,8 +153,8 @@ pub mod tests {
|
||||
|
||||
#[test]
|
||||
pub fn test_drop_token_that_are_too_long() -> crate::Result<()> {
|
||||
let ok_token_text: String = iter::repeat('A').take(MAX_TOKEN_LEN).collect();
|
||||
let mut exceeding_token_text: String = iter::repeat('A').take(MAX_TOKEN_LEN + 1).collect();
|
||||
let ok_token_text: String = "A".repeat(MAX_TOKEN_LEN);
|
||||
let mut exceeding_token_text: String = "A".repeat(MAX_TOKEN_LEN + 1);
|
||||
exceeding_token_text.push_str(" hello");
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_options = TextOptions::default().set_indexing_options(
|
||||
@@ -164,7 +164,7 @@ pub mod tests {
|
||||
);
|
||||
let text_field = schema_builder.add_text_field("text", text_options);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
index
|
||||
.tokenizers()
|
||||
.register("simple_no_truncation", SimpleTokenizer);
|
||||
@@ -229,7 +229,7 @@ pub mod tests {
|
||||
segment_writer.add_document(op, &schema).unwrap();
|
||||
}
|
||||
for i in 2..1000 {
|
||||
let mut text: String = iter::repeat("e ").take(i).collect();
|
||||
let mut text: String = "e ".repeat(i);
|
||||
text.push_str(" a");
|
||||
let op = AddOperation {
|
||||
opstamp: 2u64,
|
||||
|
||||
@@ -235,7 +235,7 @@ pub trait PostingsWriter {
|
||||
term_index,
|
||||
doc_id,
|
||||
token.position as u32,
|
||||
&term_buffer,
|
||||
term_buffer,
|
||||
heap,
|
||||
);
|
||||
} else {
|
||||
|
||||
@@ -282,7 +282,7 @@ impl Recorder for TfAndPositionRecorder {
|
||||
doc_id_and_positions
|
||||
.push((doc_id_map.get_new_doc_id(doc), buffer_positions.to_vec()));
|
||||
} else {
|
||||
serializer.write_doc(doc, buffer_positions.len() as u32, &buffer_positions);
|
||||
serializer.write_doc(doc, buffer_positions.len() as u32, buffer_positions);
|
||||
}
|
||||
}
|
||||
if doc_id_map.is_some() {
|
||||
|
||||
@@ -107,7 +107,7 @@ impl SegmentPostings {
|
||||
let fieldnorm_reader = fieldnorms.map(FieldNormReader::for_test);
|
||||
let average_field_norm = fieldnorms
|
||||
.map(|fieldnorms| {
|
||||
if fieldnorms.len() == 0 {
|
||||
if fieldnorms.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let total_num_tokens: u64 = fieldnorms
|
||||
@@ -184,7 +184,7 @@ impl DocSet for SegmentPostings {
|
||||
|
||||
// At this point we are on the block, that might contain our document.
|
||||
let output = self.block_cursor.docs_aligned();
|
||||
self.cur = self.block_searcher.search_in_block(&output, target);
|
||||
self.cur = self.block_searcher.search_in_block(output, target);
|
||||
|
||||
// The last block is not full and padded with the value TERMINATED,
|
||||
// so that we are guaranteed to have at least doc in the block (a real one or the padding)
|
||||
|
||||
@@ -356,7 +356,7 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
// encode the doc ids
|
||||
let (num_bits, block_encoded): (u8, &[u8]) = self
|
||||
.block_encoder
|
||||
.compress_block_sorted(&self.block.doc_ids(), self.last_doc_id_encoded);
|
||||
.compress_block_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
|
||||
self.last_doc_id_encoded = self.block.last_doc();
|
||||
self.skip_write
|
||||
.write_doc(self.last_doc_id_encoded, num_bits);
|
||||
@@ -366,7 +366,7 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
if self.mode.has_freq() {
|
||||
let (num_bits, block_encoded): (u8, &[u8]) = self
|
||||
.block_encoder
|
||||
.compress_block_unsorted(&self.block.term_freqs());
|
||||
.compress_block_unsorted(self.block.term_freqs());
|
||||
self.postings_write.extend(block_encoded);
|
||||
self.skip_write.write_term_freq(num_bits);
|
||||
if self.mode.has_positions() {
|
||||
@@ -426,7 +426,7 @@ impl<W: Write> PostingsSerializer<W> {
|
||||
{
|
||||
let block_encoded = self
|
||||
.block_encoder
|
||||
.compress_vint_sorted(&self.block.doc_ids(), self.last_doc_id_encoded);
|
||||
.compress_vint_sorted(self.block.doc_ids(), self.last_doc_id_encoded);
|
||||
self.postings_write.write_all(block_encoded)?;
|
||||
}
|
||||
// ... Idem for term frequencies
|
||||
|
||||
@@ -151,7 +151,7 @@ impl TermHashMap {
|
||||
pub fn iter(&self) -> Iter<'_> {
|
||||
Iter {
|
||||
inner: self.occupied.iter(),
|
||||
hashmap: &self,
|
||||
hashmap: self,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ mod tests {
|
||||
|
||||
let mut vanilla_hash_map = HashMap::new();
|
||||
let mut iter_values = hash_map.iter();
|
||||
while let Some((key, addr, _)) = iter_values.next() {
|
||||
for (key, addr, _) in iter_values {
|
||||
let val: u32 = hash_map.heap.read(addr);
|
||||
vanilla_hash_map.insert(key.to_owned(), val);
|
||||
}
|
||||
|
||||
@@ -251,7 +251,7 @@ mod tests {
|
||||
|
||||
impl PartialEq for Float {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cmp(&other) == Ordering::Equal
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
@@ -289,7 +289,7 @@ mod tests {
|
||||
if !nearly_equals(score, limit) {
|
||||
checkpoints.push((doc, score));
|
||||
}
|
||||
return limit;
|
||||
limit
|
||||
});
|
||||
checkpoints
|
||||
}
|
||||
@@ -368,10 +368,10 @@ mod tests {
|
||||
.iter()
|
||||
.map(|posting_list| {
|
||||
posting_list
|
||||
.into_iter()
|
||||
.iter()
|
||||
.cloned()
|
||||
.flat_map(|(doc, term_freq)| {
|
||||
(0 as u32..REPEAT as u32).map(move |offset| {
|
||||
(0_u32..REPEAT as u32).map(move |offset| {
|
||||
(
|
||||
doc * (REPEAT as u32) + offset,
|
||||
if offset == 0 { term_freq } else { 1 },
|
||||
|
||||
@@ -329,7 +329,7 @@ impl MoreLikeThis {
|
||||
continue;
|
||||
}
|
||||
|
||||
let doc_freq = searcher.doc_freq(&term)?;
|
||||
let doc_freq = searcher.doc_freq(term)?;
|
||||
|
||||
// ignore terms with less than min_doc_frequency
|
||||
if self
|
||||
|
||||
@@ -53,7 +53,7 @@ impl PhraseWeight {
|
||||
for &(offset, ref term) in &self.phrase_terms {
|
||||
if let Some(postings) = reader
|
||||
.inverted_index(term.field())?
|
||||
.read_postings(&term, IndexRecordOption::WithFreqsAndPositions)?
|
||||
.read_postings(term, IndexRecordOption::WithFreqsAndPositions)?
|
||||
{
|
||||
term_postings_list.push((offset, postings));
|
||||
} else {
|
||||
@@ -71,7 +71,7 @@ impl PhraseWeight {
|
||||
for &(offset, ref term) in &self.phrase_terms {
|
||||
if let Some(postings) = reader
|
||||
.inverted_index(term.field())?
|
||||
.read_postings_no_deletes(&term, IndexRecordOption::WithFreqsAndPositions)?
|
||||
.read_postings_no_deletes(term, IndexRecordOption::WithFreqsAndPositions)?
|
||||
{
|
||||
term_postings_list.push((offset, postings));
|
||||
} else {
|
||||
|
||||
@@ -722,7 +722,7 @@ mod test {
|
||||
let is_not_indexed_err = |query: &str| {
|
||||
let result: Result<Box<dyn Query>, QueryParserError> = query_parser.parse_query(query);
|
||||
if let Err(QueryParserError::FieldNotIndexed(field_name)) = result {
|
||||
Some(field_name.clone())
|
||||
Some(field_name)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -91,8 +91,8 @@ impl RangeQuery {
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type,
|
||||
left_bound: map_bound(&left_bound, &verify_and_unwrap_term),
|
||||
right_bound: map_bound(&right_bound, &verify_and_unwrap_term),
|
||||
left_bound: map_bound(left_bound, &verify_and_unwrap_term),
|
||||
right_bound: map_bound(right_bound, &verify_and_unwrap_term),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,7 +493,7 @@ mod tests {
|
||||
let year = schema.get_field("year").unwrap();
|
||||
index_writer.add_document(doc!(
|
||||
title => "hemoglobin blood",
|
||||
year => 1990 as i64
|
||||
year => 1990_i64
|
||||
));
|
||||
index_writer.commit()?;
|
||||
let reader = index.reader()?;
|
||||
|
||||
@@ -170,8 +170,8 @@ mod test {
|
||||
verify_regex_query(matching_one, matching_zero, reader.clone());
|
||||
|
||||
let matching_one = RegexQuery::from_regex(r1, field);
|
||||
let matching_zero = RegexQuery::from_regex(r2.clone(), field);
|
||||
let matching_zero = RegexQuery::from_regex(r2, field);
|
||||
|
||||
verify_regex_query(matching_one, matching_zero, reader.clone());
|
||||
verify_regex_query(matching_one, matching_zero, reader);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -253,7 +253,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn test_block_wand_aux(term_query: &TermQuery, searcher: &Searcher) -> crate::Result<()> {
|
||||
let term_weight = term_query.specialized_weight(&searcher, true)?;
|
||||
let term_weight = term_query.specialized_weight(searcher, true)?;
|
||||
for reader in searcher.segment_readers() {
|
||||
let mut block_max_scores = vec![];
|
||||
let mut block_max_scores_b = vec![];
|
||||
@@ -309,7 +309,7 @@ mod tests {
|
||||
}
|
||||
writer.commit()?;
|
||||
let term_query = TermQuery::new(
|
||||
Term::from_field_text(text_field, &"bbbb"),
|
||||
Term::from_field_text(text_field, "bbbb"),
|
||||
IndexRecordOption::WithFreqs,
|
||||
);
|
||||
let segment_ids: Vec<SegmentId>;
|
||||
|
||||
@@ -318,8 +318,7 @@ mod tests {
|
||||
let docset_factory = || {
|
||||
let res: Box<dyn DocSet> = Box::new(Union::<_, DoNothingCombiner>::from(
|
||||
docs_list
|
||||
.iter()
|
||||
.map(|docs| docs.clone())
|
||||
.iter().cloned()
|
||||
.map(VecDocSet::from)
|
||||
.map(|docset| ConstScorer::new(docset, 1.0))
|
||||
.collect::<Vec<_>>(),
|
||||
|
||||
@@ -779,7 +779,7 @@ mod tests {
|
||||
}
|
||||
]"#;
|
||||
let tmp_schema: Schema =
|
||||
serde_json::from_str(&schema_content).expect("error while reading json");
|
||||
serde_json::from_str(schema_content).expect("error while reading json");
|
||||
for (_field, field_entry) in tmp_schema.fields() {
|
||||
schema_builder.add_field(field_entry.clone());
|
||||
}
|
||||
|
||||
@@ -137,7 +137,7 @@ fn search_fragments<'a>(
|
||||
};
|
||||
fragment = FragmentCandidate::new(next.offset_from);
|
||||
}
|
||||
fragment.try_add_token(next, &terms);
|
||||
fragment.try_add_token(next, terms);
|
||||
}
|
||||
if fragment.score > 0.0 {
|
||||
fragments.push(fragment)
|
||||
@@ -286,8 +286,8 @@ impl SnippetGenerator {
|
||||
/// Generates a snippet for the given text.
|
||||
pub fn snippet(&self, text: &str) -> Snippet {
|
||||
let fragment_candidates =
|
||||
search_fragments(&self.tokenizer, &text, &self.terms_text, self.max_num_chars);
|
||||
select_best_fragment_combination(&fragment_candidates[..], &text)
|
||||
search_fragments(&self.tokenizer, text, &self.terms_text, self.max_num_chars);
|
||||
select_best_fragment_combination(&fragment_candidates[..], text)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -296,7 +296,7 @@ mod test {
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
let schema = Schema::builder().build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let searcher_space_usage = searcher.space_usage().unwrap();
|
||||
@@ -325,7 +325,7 @@ mod test {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let name = schema_builder.add_u64_field("name", FAST | INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
@@ -362,7 +362,7 @@ mod test {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let name = schema_builder.add_text_field("name", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
@@ -401,7 +401,7 @@ mod test {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let name = schema_builder.add_text_field("name", STORED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
@@ -439,7 +439,7 @@ mod test {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let name = schema_builder.add_u64_field("name", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
{
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
|
||||
@@ -134,9 +134,7 @@ mod tests {
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(NoMergePolicy));
|
||||
let long_text: String = iter::repeat("abcdefghijklmnopqrstuvwxyz")
|
||||
.take(1_000)
|
||||
.collect();
|
||||
let long_text: String = "abcdefghijklmnopqrstuvwxyz".repeat(1_000);
|
||||
for _ in 0..20 {
|
||||
index_writer.add_document(doc!(body=>long_text.clone()));
|
||||
}
|
||||
@@ -218,9 +216,7 @@ mod tests {
|
||||
target: DocId,
|
||||
) -> Option<Checkpoint> {
|
||||
checkpoints
|
||||
.into_iter()
|
||||
.filter(|checkpoint| checkpoint.doc_range.end > target)
|
||||
.next()
|
||||
.into_iter().find(|checkpoint| checkpoint.doc_range.end > target)
|
||||
}
|
||||
|
||||
fn test_skip_index_aux(skip_index: SkipIndex, checkpoints: &[Checkpoint]) {
|
||||
|
||||
@@ -64,7 +64,7 @@ impl StoreWriter {
|
||||
pub fn store_bytes(&mut self, serialized_document: &[u8]) -> io::Result<()> {
|
||||
let doc_num_bytes = serialized_document.len();
|
||||
VInt(doc_num_bytes as u64).serialize(&mut self.current_block)?;
|
||||
self.current_block.write_all(&serialized_document)?;
|
||||
self.current_block.write_all(serialized_document)?;
|
||||
self.doc += 1;
|
||||
if self.current_block.len() > BLOCK_SIZE {
|
||||
self.write_and_compress_block()?;
|
||||
|
||||
@@ -74,7 +74,7 @@ fn test_term_dictionary_simple() -> crate::Result<()> {
|
||||
{
|
||||
{
|
||||
let (k, v) = stream.next().unwrap();
|
||||
assert_eq!(k.as_ref(), "abc".as_bytes());
|
||||
assert_eq!(k, "abc".as_bytes());
|
||||
assert_eq!(v.doc_freq, 34u32);
|
||||
}
|
||||
assert_eq!(stream.key(), "abc".as_bytes());
|
||||
@@ -114,7 +114,7 @@ fn test_term_dictionary_stream() -> crate::Result<()> {
|
||||
let mut i = 0;
|
||||
while let Some((streamer_k, streamer_v)) = streamer.next() {
|
||||
let &(ref key, ref v) = &ids[i];
|
||||
assert_eq!(streamer_k.as_ref(), key.as_bytes());
|
||||
assert_eq!(streamer_k, key.as_bytes());
|
||||
assert_eq!(streamer_v, &make_term_info(*v as u64));
|
||||
i += 1;
|
||||
}
|
||||
@@ -182,7 +182,7 @@ fn test_stream_range() -> crate::Result<()> {
|
||||
for j in 0..3 {
|
||||
let (streamer_k, streamer_v) = streamer.next().unwrap();
|
||||
let &(ref key, ref v) = &ids[i + j];
|
||||
assert_eq!(str::from_utf8(streamer_k.as_ref()).unwrap(), key);
|
||||
assert_eq!(str::from_utf8(streamer_k).unwrap(), key);
|
||||
assert_eq!(streamer_v.doc_freq, *v);
|
||||
assert_eq!(streamer_v, &make_term_info(*v as u64));
|
||||
}
|
||||
@@ -199,7 +199,7 @@ fn test_stream_range() -> crate::Result<()> {
|
||||
for j in 0..3 {
|
||||
let (streamer_k, streamer_v) = streamer.next().unwrap();
|
||||
let &(ref key, ref v) = &ids[i + j + 1];
|
||||
assert_eq!(streamer_k.as_ref(), key.as_bytes());
|
||||
assert_eq!(streamer_k, key.as_bytes());
|
||||
assert_eq!(streamer_v.doc_freq, *v);
|
||||
}
|
||||
}
|
||||
@@ -230,10 +230,10 @@ fn test_empty_string() -> crate::Result<()> {
|
||||
let buffer: Vec<u8> = {
|
||||
let mut term_dictionary_builder = TermDictionaryBuilder::create(vec![]).unwrap();
|
||||
term_dictionary_builder
|
||||
.insert(&[], &make_term_info(1 as u64))
|
||||
.insert(&[], &make_term_info(1_u64))
|
||||
.unwrap();
|
||||
term_dictionary_builder
|
||||
.insert(&[1u8], &make_term_info(2 as u64))
|
||||
.insert(&[1u8], &make_term_info(2_u64))
|
||||
.unwrap();
|
||||
term_dictionary_builder.finish()?
|
||||
};
|
||||
@@ -266,7 +266,7 @@ fn test_stream_range_boundaries_forward() -> crate::Result<()> {
|
||||
let term_dictionary = stream_range_test_dict()?;
|
||||
let value_list = |mut streamer: TermStreamer<'_>| {
|
||||
let mut res: Vec<u32> = vec![];
|
||||
while let Some((_, ref v)) = streamer.next() {
|
||||
while let Some((_, v)) = streamer.next() {
|
||||
res.push(v.doc_freq);
|
||||
}
|
||||
res
|
||||
@@ -308,7 +308,7 @@ fn test_stream_range_boundaries_backward() -> crate::Result<()> {
|
||||
let term_dictionary = stream_range_test_dict()?;
|
||||
let value_list_backward = |mut streamer: TermStreamer<'_>| {
|
||||
let mut res: Vec<u32> = vec![];
|
||||
while let Some((_, ref v)) = streamer.next() {
|
||||
while let Some((_, v)) = streamer.next() {
|
||||
res.push(v.doc_freq);
|
||||
}
|
||||
res.reverse();
|
||||
@@ -393,7 +393,7 @@ fn test_automaton_search() -> crate::Result<()> {
|
||||
use crate::query::DfaWrapper;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder;
|
||||
|
||||
const COUNTRIES: [&'static str; 7] = [
|
||||
const COUNTRIES: [&str; 7] = [
|
||||
"San Marino",
|
||||
"Serbia",
|
||||
"Slovakia",
|
||||
|
||||
@@ -4036,7 +4036,7 @@ mod tests {
|
||||
for (characters, folded) in foldings {
|
||||
for &c in characters {
|
||||
assert_eq!(
|
||||
folding_using_raw_tokenizer_helper(&c),
|
||||
folding_using_raw_tokenizer_helper(c),
|
||||
folded,
|
||||
"testing that character \"{}\" becomes \"{}\"",
|
||||
c,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use fail;
|
||||
|
||||
use std::path::Path;
|
||||
use tantivy::directory::{Directory, ManagedDirectory, RamDirectory, TerminatingWrite};
|
||||
use tantivy::doc;
|
||||
|
||||
Reference in New Issue
Block a user