mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
* Adapting tantivy to make it possible to be plugged to quickwit. * Apply suggestions from code review Co-authored-by: PSeitz <PSeitz@users.noreply.github.com> * Added unit test --------- Co-authored-by: PSeitz <PSeitz@users.noreply.github.com>
137 lines
5.2 KiB
Rust
137 lines
5.2 KiB
Rust
pub mod delete_queue;
|
|
|
|
pub mod doc_id_mapping;
|
|
mod doc_opstamp_mapping;
|
|
mod flat_map_with_buffer;
|
|
pub mod index_writer;
|
|
mod index_writer_status;
|
|
mod log_merge_policy;
|
|
mod merge_operation;
|
|
pub mod merge_policy;
|
|
pub mod merger;
|
|
mod merger_sorted_index_test;
|
|
pub mod operation;
|
|
pub mod prepared_commit;
|
|
mod segment_entry;
|
|
mod segment_manager;
|
|
mod segment_register;
|
|
pub mod segment_serializer;
|
|
pub mod segment_updater;
|
|
mod segment_writer;
|
|
mod stamper;
|
|
|
|
use crossbeam_channel as channel;
|
|
use smallvec::SmallVec;
|
|
|
|
pub use self::index_writer::IndexWriter;
|
|
pub use self::log_merge_policy::LogMergePolicy;
|
|
pub use self::merge_operation::MergeOperation;
|
|
pub use self::merge_policy::{MergeCandidate, MergePolicy, NoMergePolicy};
|
|
pub use self::prepared_commit::PreparedCommit;
|
|
pub use self::segment_entry::SegmentEntry;
|
|
pub use self::segment_manager::SegmentManager;
|
|
pub use self::segment_serializer::SegmentSerializer;
|
|
pub use self::segment_updater::{merge_filtered_segments, merge_indices};
|
|
pub use self::segment_writer::SegmentWriter;
|
|
use crate::indexer::operation::AddOperation;
|
|
|
|
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
|
pub type DefaultMergePolicy = LogMergePolicy;
|
|
|
|
// Batch of documents.
|
|
// Most of the time, users will send operation one-by-one, but it can be useful to
|
|
// send them as a small block to ensure that
|
|
// - all docs in the operation will happen on the same segment and continuous doc_ids.
|
|
// - all operations in the group are committed at the same time, making the group
|
|
// atomic.
|
|
type AddBatch = SmallVec<[AddOperation; 4]>;
|
|
type AddBatchSender = channel::Sender<AddBatch>;
|
|
type AddBatchReceiver = channel::Receiver<AddBatch>;
|
|
|
|
#[cfg(feature = "mmap")]
|
|
#[cfg(test)]
|
|
mod tests_mmap {
|
|
|
|
use crate::collector::Count;
|
|
use crate::query::QueryParser;
|
|
use crate::schema::{JsonObjectOptions, Schema, TEXT};
|
|
use crate::{Index, Term};
|
|
|
|
#[test]
|
|
fn test_advance_delete_bug() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_field = schema_builder.add_text_field("text", TEXT);
|
|
let index = Index::create_from_tempdir(schema_builder.build())?;
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
// there must be one deleted document in the segment
|
|
index_writer.add_document(doc!(text_field=>"b"))?;
|
|
index_writer.delete_term(Term::from_field_text(text_field, "b"));
|
|
// we need enough data to trigger the bug (at least 32 documents)
|
|
for _ in 0..32 {
|
|
index_writer.add_document(doc!(text_field=>"c"))?;
|
|
}
|
|
index_writer.commit()?;
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_field_expand_dots_disabled_dot_escaped_required() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_field = schema_builder.add_json_field("json", TEXT);
|
|
let index = Index::create_in_ram(schema_builder.build());
|
|
let mut index_writer = index.writer_for_tests().unwrap();
|
|
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
|
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
|
index_writer.commit().unwrap();
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.num_docs(), 1);
|
|
let parse_query = QueryParser::for_index(&index, Vec::new());
|
|
{
|
|
let query = parse_query
|
|
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
|
|
.unwrap();
|
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
|
assert_eq!(num_docs, 1);
|
|
}
|
|
{
|
|
let query = parse_query
|
|
.parse_query(r#"json.k8s.container.name:prometheus"#)
|
|
.unwrap();
|
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
|
assert_eq!(num_docs, 0);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_field_expand_dots_enabled_dot_escape_not_required() {
|
|
let mut schema_builder = Schema::builder();
|
|
let json_options: JsonObjectOptions =
|
|
JsonObjectOptions::from(TEXT).set_expand_dots_enabled();
|
|
let json_field = schema_builder.add_json_field("json", json_options);
|
|
let index = Index::create_in_ram(schema_builder.build());
|
|
let mut index_writer = index.writer_for_tests().unwrap();
|
|
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
|
|
index_writer.add_document(doc!(json_field=>json)).unwrap();
|
|
index_writer.commit().unwrap();
|
|
let reader = index.reader().unwrap();
|
|
let searcher = reader.searcher();
|
|
assert_eq!(searcher.num_docs(), 1);
|
|
let parse_query = QueryParser::for_index(&index, Vec::new());
|
|
{
|
|
let query = parse_query
|
|
.parse_query(r#"json.k8s.container.name:prometheus"#)
|
|
.unwrap();
|
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
|
assert_eq!(num_docs, 1);
|
|
}
|
|
{
|
|
let query = parse_query
|
|
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
|
|
.unwrap();
|
|
let num_docs = searcher.search(&query, &Count).unwrap();
|
|
assert_eq!(num_docs, 1);
|
|
}
|
|
}
|
|
}
|