mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-28 04:52:55 +00:00
Compare commits
2 Commits
trinity.po
...
fmassot--b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
951a898633 | ||
|
|
003722d831 |
100000
benches/hdfs_with_array.json
Normal file
100000
benches/hdfs_with_array.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,116 +1,157 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use itertools::Itertools;
|
||||
use pprof::criterion::{Output, PProfProfiler};
|
||||
use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
|
||||
use tantivy::Index;
|
||||
use serde_json::{self, Value as JsonValue};
|
||||
use tantivy::directory::RamDirectory;
|
||||
use tantivy::schema::{INDEXED, STORED, STRING, TEXT, TextOptions, TextFieldIndexing, FieldValue, Value};
|
||||
use tantivy::{Index, IndexBuilder, Document};
|
||||
|
||||
const HDFS_LOGS: &str = include_str!("hdfs.json");
|
||||
const NUM_REPEATS: usize = 2;
|
||||
|
||||
pub fn hdfs_index_benchmark(c: &mut Criterion) {
|
||||
let schema = {
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
schema_builder.add_u64_field("timestamp", INDEXED);
|
||||
schema_builder.add_text_field("body", TEXT);
|
||||
schema_builder.add_text_field("severity", STRING);
|
||||
schema_builder.build()
|
||||
};
|
||||
let schema_with_store = {
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
schema_builder.add_u64_field("timestamp", INDEXED | STORED);
|
||||
schema_builder.add_text_field("body", TEXT | STORED);
|
||||
schema_builder.add_text_field("severity", STRING | STORED);
|
||||
schema_builder.build()
|
||||
};
|
||||
let dynamic_schema = {
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
schema_builder.add_json_field("json", TEXT);
|
||||
schema_builder.build()
|
||||
};
|
||||
let mut schema_builder = tantivy::schema::SchemaBuilder::new();
|
||||
let text_indexing_options = TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_fieldnorms(false)
|
||||
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
|
||||
let mut text_options = TextOptions::default()
|
||||
.set_indexing_options(text_indexing_options);
|
||||
let text_field = schema_builder.add_text_field("body", text_options);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
// prepare doc
|
||||
let mut documents_no_array = Vec::new();
|
||||
let mut documents_with_array = Vec::new();
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let json_obj: serde_json::Map<String, JsonValue> = serde_json::from_str(doc_json).unwrap();
|
||||
let text = json_obj.get("body").unwrap().as_str().unwrap();
|
||||
let text_array = text.split(" ");
|
||||
let mut doc_no_array = Document::new();
|
||||
doc_no_array.add_field_value(text_field, Value::Str(text.to_string()));
|
||||
documents_no_array.push(doc_no_array);
|
||||
let mut doc_with_array = Document::new();
|
||||
for text_element in text_array {
|
||||
doc_with_array.add_field_value(text_field, Value::Str(text_element.to_string()));
|
||||
}
|
||||
documents_with_array.push(doc_with_array);
|
||||
}
|
||||
|
||||
let mut group = c.benchmark_group("index-hdfs");
|
||||
group.sample_size(20);
|
||||
group.bench_function("index-hdfs-no-commit", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
let ram_directory = RamDirectory::create();
|
||||
let mut index_writer = IndexBuilder::new()
|
||||
.schema(schema.clone())
|
||||
.single_segment_index_writer(ram_directory, 100_000_000)
|
||||
.unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
let documents_cloned = documents_no_array.clone();
|
||||
for doc in documents_cloned {
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-with-commit", |b| {
|
||||
group.bench_function("index-hdfs-with-array-no-commit", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
let ram_directory = RamDirectory::create();
|
||||
let mut index_writer = IndexBuilder::new()
|
||||
.schema(schema.clone())
|
||||
.single_segment_index_writer(ram_directory, 100_000_000)
|
||||
.unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema_with_store.clone());
|
||||
let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
let documents_with_array_cloned = documents_with_array.clone();
|
||||
for doc in documents_with_array_cloned {
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(schema_with_store.clone());
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let doc = schema.parse_document(doc_json).unwrap();
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(dynamic_schema.clone());
|
||||
let json_field = dynamic_schema.get_field("json").unwrap();
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_str(doc_json).unwrap();
|
||||
let doc = tantivy::doc!(json_field=>json_val);
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
||||
b.iter(|| {
|
||||
let index = Index::create_in_ram(dynamic_schema.clone());
|
||||
let json_field = dynamic_schema.get_field("json").unwrap();
|
||||
let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
|
||||
for _ in 0..NUM_REPEATS {
|
||||
for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
serde_json::from_str(doc_json).unwrap();
|
||||
let doc = tantivy::doc!(json_field=>json_val);
|
||||
index_writer.add_document(doc).unwrap();
|
||||
}
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
})
|
||||
});
|
||||
// group.bench_function("index-hdfs-with-commit", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let doc = schema.parse_document(doc_json).unwrap();
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let doc = schema.parse_document(doc_json).unwrap();
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let doc = schema.parse_document(doc_json).unwrap();
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-no-commit-json-without-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
// serde_json::from_str(doc_json).unwrap();
|
||||
// let doc = tantivy::doc!(json_field=>json_val);
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
// });
|
||||
// group.bench_function("index-hdfs-with-commit-json-without-docstore", |b| {
|
||||
// b.iter(|| {
|
||||
// let ram_directory = RamDirectory::create();
|
||||
// let mut index_writer = IndexBuilder::new()
|
||||
// .schema(schema.clone())
|
||||
// .single_segment_index_writer(ram_directory, 100_000_000)
|
||||
// .unwrap();
|
||||
// for _ in 0..NUM_REPEATS {
|
||||
// for doc_json in HDFS_LOGS.trim().split("\n") {
|
||||
// let json_val: serde_json::Map<String, serde_json::Value> =
|
||||
// serde_json::from_str(doc_json).unwrap();
|
||||
// let doc = tantivy::doc!(json_field=>json_val);
|
||||
// index_writer.add_document(doc).unwrap();
|
||||
// }
|
||||
// }
|
||||
// index_writer.commit().unwrap();
|
||||
// })
|
||||
//});
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
|
||||
Reference in New Issue
Block a user