added a bench to measure the perf of indexing logs (#1275)

2025-12-23 02:29:57 +00:00 · 2022-02-18 16:48:29 +09:00
parent 505e6a440c
commit 850b9eaea4
3 changed files with 100079 additions and 0 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,6 +77,11 @@ opt-level = 3
 debug = false
 debug-assertions = false

+[profile.bench]
+opt-level = 3
+debug = true
+debug-assertions = false
+
 [profile.test]
 debug-assertions = true
 overflow-checks = true
@@ -110,3 +115,7 @@ required-features = ["fail/failpoints"]
 [[bench]]
 name = "analyzer"
 harness = false
+
+[[bench]]
+name = "index-bench"
+harness = false
--- a/benches/hdfs.json
+++ b/benches/hdfs.json
--- a/benches/index-bench.rs
+++ b/benches/index-bench.rs
@@ -0,0 +1,70 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use tantivy::schema::{INDEXED, STORED, STRING, TEXT};
+use tantivy::Index;
+
+const HDFS_LOGS: &str = include_str!("hdfs.json");
+
+pub fn criterion_benchmark(c: &mut Criterion) {
+    let schema = {
+        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
+        schema_builder.add_u64_field("timestamp", INDEXED);
+        schema_builder.add_text_field("body", TEXT);
+        schema_builder.add_text_field("severity", STRING);
+        schema_builder.build()
+    };
+    let schema_with_store = {
+        let mut schema_builder = tantivy::schema::SchemaBuilder::new();
+        schema_builder.add_u64_field("timestamp", INDEXED | STORED);
+        schema_builder.add_text_field("body", TEXT | STORED);
+        schema_builder.add_text_field("severity", STRING | STORED);
+        schema_builder.build()
+    };
+
+    let mut group = c.benchmark_group("index-hdfs");
+    group.sample_size(20);
+    group.bench_function("index-hdfs-no-commit", |b| {
+        b.iter(|| {
+            let index = Index::create_in_ram(schema.clone());
+            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in HDFS_LOGS.trim().split("\n") {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
+            }
+        })
+    });
+    group.bench_function("index-hdfs-with-commit", |b| {
+        b.iter(|| {
+            let index = Index::create_in_ram(schema.clone());
+            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in HDFS_LOGS.trim().split("\n") {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
+            }
+            index_writer.commit().unwrap();
+        })
+    });
+    group.bench_function("index-hdfs-no-commit-with-docstore", |b| {
+        b.iter(|| {
+            let index = Index::create_in_ram(schema_with_store.clone());
+            let index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in HDFS_LOGS.trim().split("\n") {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
+            }
+        })
+    });
+    group.bench_function("index-hdfs-with-commit-with-docstore", |b| {
+        b.iter(|| {
+            let index = Index::create_in_ram(schema_with_store.clone());
+            let mut index_writer = index.writer_with_num_threads(1, 100_000_000).unwrap();
+            for doc_json in HDFS_LOGS.trim().split("\n") {
+                let doc = schema.parse_document(doc_json).unwrap();
+                index_writer.add_document(doc).unwrap();
+            }
+            index_writer.commit().unwrap();
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);