diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml
index 5a9a117fe2..8a93e20b57 100644
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -79,5 +79,6 @@ rand.workspace = true
toml.workspace = true
[[bench]]
-name = "bench_merge_tree"
+name = "memtable_bench"
harness = false
+required-features = ["test"]
diff --git a/src/mito2/README.md b/src/mito2/README.md
index ae58d6d3d2..1e2accd939 100644
--- a/src/mito2/README.md
+++ b/src/mito2/README.md
@@ -7,3 +7,9 @@ The Alfa Romeo [MiTo](https://en.wikipedia.org/wiki/Alfa_Romeo_MiTo) is a front-
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
>
-- by Jeremy Clarkson
+
+## Benchmarks
+Run benchmarks in this crate:
+```bash
+cargo bench -p mito2 -F test
+```
diff --git a/src/mito2/benches/bench_merge_tree.rs b/src/mito2/benches/bench_merge_tree.rs
deleted file mode 100644
index febc648dae..0000000000
--- a/src/mito2/benches/bench_merge_tree.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-mod merge_tree_bench;
-
-use criterion::criterion_main;
-
-criterion_main! {
- merge_tree_bench::benches
-}
diff --git a/src/mito2/benches/memtable_bench.rs b/src/mito2/benches/memtable_bench.rs
new file mode 100644
index 0000000000..30701fe2ba
--- /dev/null
+++ b/src/mito2/benches/memtable_bench.rs
@@ -0,0 +1,352 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use api::v1::value::ValueData;
+use api::v1::{Row, Rows, SemanticType};
+use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion_common::Column;
+use datafusion_expr::{lit, Expr};
+use datatypes::data_type::ConcreteDataType;
+use datatypes::schema::ColumnSchema;
+use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
+use mito2::memtable::time_series::TimeSeriesMemtable;
+use mito2::memtable::{KeyValues, Memtable};
+use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
+use rand::rngs::ThreadRng;
+use rand::seq::SliceRandom;
+use rand::Rng;
+use store_api::metadata::{
+ ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
+};
+use store_api::storage::RegionId;
+use table::predicate::Predicate;
+
+/// Writes rows.
+fn write_rows(c: &mut Criterion) {
+ let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
+ let timestamps = (0..100).collect::>();
+
+ // Note that this test only generate one time series.
+ let mut group = c.benchmark_group("write");
+ group.bench_function("merge_tree", |b| {
+ let memtable =
+ MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
+ let kvs =
+ memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
+ b.iter(|| {
+ memtable.write(&kvs).unwrap();
+ });
+ });
+ group.bench_function("time_series", |b| {
+ let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
+ let kvs =
+ memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
+ b.iter(|| {
+ memtable.write(&kvs).unwrap();
+ });
+ });
+}
+
+/// Scans all rows.
+fn full_scan(c: &mut Criterion) {
+ let metadata = Arc::new(cpu_metadata());
+ let config = MergeTreeConfig::default();
+ let start_sec = 1710043200;
+ let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
+
+ let mut group = c.benchmark_group("full_scan");
+ group.sample_size(10);
+ group.bench_function("merge_tree", |b| {
+ let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
+ for kvs in generator.iter() {
+ memtable.write(&kvs).unwrap();
+ }
+
+ b.iter(|| {
+ let iter = memtable.iter(None, None).unwrap();
+ for batch in iter {
+ let _batch = batch.unwrap();
+ }
+ });
+ });
+ group.bench_function("time_series", |b| {
+ let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
+ for kvs in generator.iter() {
+ memtable.write(&kvs).unwrap();
+ }
+
+ b.iter(|| {
+ let iter = memtable.iter(None, None).unwrap();
+ for batch in iter {
+ let _batch = batch.unwrap();
+ }
+ });
+ });
+}
+
+/// Filters 1 host.
+fn filter_1_host(c: &mut Criterion) {
+ let metadata = Arc::new(cpu_metadata());
+ let config = MergeTreeConfig::default();
+ let start_sec = 1710043200;
+ let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
+
+ let mut group = c.benchmark_group("filter_1_host");
+ group.sample_size(10);
+ group.bench_function("merge_tree", |b| {
+ let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
+ for kvs in generator.iter() {
+ memtable.write(&kvs).unwrap();
+ }
+ let predicate = generator.random_host_filter();
+
+ b.iter(|| {
+ let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
+ for batch in iter {
+ let _batch = batch.unwrap();
+ }
+ });
+ });
+ group.bench_function("time_series", |b| {
+ let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
+ for kvs in generator.iter() {
+ memtable.write(&kvs).unwrap();
+ }
+ let predicate = generator.random_host_filter();
+
+ b.iter(|| {
+ let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
+ for batch in iter {
+ let _batch = batch.unwrap();
+ }
+ });
+ });
+}
+
+struct Host {
+ hostname: String,
+ region: String,
+ datacenter: String,
+ rack: String,
+ os: String,
+ arch: String,
+ team: String,
+ service: String,
+ service_version: String,
+ service_environment: String,
+}
+
+impl Host {
+ fn random_with_id(id: usize) -> Host {
+ let mut rng = rand::thread_rng();
+ let region = format!("ap-southeast-{}", rng.gen_range(0..10));
+ let datacenter = format!(
+ "{}{}",
+ region,
+ ['a', 'b', 'c', 'd', 'e'].choose(&mut rng).unwrap()
+ );
+ Host {
+ hostname: format!("host_{id}"),
+ region,
+ datacenter,
+ rack: rng.gen_range(0..100).to_string(),
+ os: "Ubuntu16.04LTS".to_string(),
+ arch: "x86".to_string(),
+ team: "CHI".to_string(),
+ service: rng.gen_range(0..100).to_string(),
+ service_version: rng.gen_range(0..10).to_string(),
+ service_environment: "test".to_string(),
+ }
+ }
+
+ fn fill_values(&self, values: &mut Vec) {
+ let tags = [
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.hostname.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.region.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.datacenter.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.rack.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.os.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.arch.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.team.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.service.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.service_version.clone())),
+ },
+ api::v1::Value {
+ value_data: Some(ValueData::StringValue(self.service_environment.clone())),
+ },
+ ];
+ for tag in tags {
+ values.push(tag);
+ }
+ }
+}
+
+struct CpuDataGenerator {
+ metadata: RegionMetadataRef,
+ column_schemas: Vec,
+ hosts: Vec,
+ start_sec: i64,
+ end_sec: i64,
+}
+
+impl CpuDataGenerator {
+ fn new(metadata: RegionMetadataRef, num_hosts: usize, start_sec: i64, end_sec: i64) -> Self {
+ let column_schemas = region_metadata_to_row_schema(&metadata);
+ Self {
+ metadata,
+ column_schemas,
+ hosts: Self::generate_hosts(num_hosts),
+ start_sec,
+ end_sec,
+ }
+ }
+
+ fn iter(&self) -> impl Iterator- + '_ {
+ // point per 10s.
+ (self.start_sec..self.end_sec)
+ .step_by(10)
+ .enumerate()
+ .map(|(seq, ts)| self.build_key_values(seq, ts))
+ }
+
+ fn build_key_values(&self, seq: usize, current_sec: i64) -> KeyValues {
+ let rows = self
+ .hosts
+ .iter()
+ .map(|host| {
+ let mut rng = rand::thread_rng();
+ let mut values = Vec::with_capacity(21);
+ values.push(api::v1::Value {
+ value_data: Some(ValueData::TimestampMillisecondValue(current_sec * 1000)),
+ });
+ host.fill_values(&mut values);
+ for _ in 0..10 {
+ values.push(api::v1::Value {
+ value_data: Some(ValueData::F64Value(Self::random_f64(&mut rng))),
+ });
+ }
+ Row { values }
+ })
+ .collect();
+ let mutation = api::v1::Mutation {
+ op_type: api::v1::OpType::Put as i32,
+ sequence: seq as u64,
+ rows: Some(Rows {
+ schema: self.column_schemas.clone(),
+ rows,
+ }),
+ };
+
+ KeyValues::new(&self.metadata, mutation).unwrap()
+ }
+
+ fn random_host_filter(&self) -> Predicate {
+ let host = self.random_hostname();
+ let expr = Expr::Column(Column::from_name("hostname")).eq(lit(host));
+ Predicate::new(vec![expr.into()])
+ }
+
+ fn random_hostname(&self) -> String {
+ let mut rng = rand::thread_rng();
+ self.hosts.choose(&mut rng).unwrap().hostname.clone()
+ }
+
+ fn random_f64(rng: &mut ThreadRng) -> f64 {
+ let base: u32 = rng.gen_range(30..95);
+ base as f64
+ }
+
+ fn generate_hosts(num_hosts: usize) -> Vec {
+ (0..num_hosts).map(Host::random_with_id).collect()
+ }
+}
+
+/// Creates a metadata for TSBS cpu-like table.
+fn cpu_metadata() -> RegionMetadata {
+ let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
+ builder.push_column_metadata(ColumnMetadata {
+ column_schema: ColumnSchema::new(
+ "ts",
+ ConcreteDataType::timestamp_millisecond_datatype(),
+ false,
+ ),
+ semantic_type: SemanticType::Timestamp,
+ column_id: 0,
+ });
+ let mut column_id = 1;
+ let tags = [
+ "hostname",
+ "region",
+ "datacenter",
+ "rack",
+ "os",
+ "arch",
+ "team",
+ "service",
+ "service_version",
+ "service_environment",
+ ];
+ for tag in tags {
+ builder.push_column_metadata(ColumnMetadata {
+ column_schema: ColumnSchema::new(tag, ConcreteDataType::string_datatype(), true),
+ semantic_type: SemanticType::Tag,
+ column_id,
+ });
+ column_id += 1;
+ }
+ let fields = [
+ "usage_user",
+ "usage_system",
+ "usage_idle",
+ "usage_nice",
+ "usage_iowait",
+ "usage_irq",
+ "usage_softirq",
+ "usage_steal",
+ "usage_guest",
+ "usage_guest_nice",
+ ];
+ for field in fields {
+ builder.push_column_metadata(ColumnMetadata {
+ column_schema: ColumnSchema::new(field, ConcreteDataType::float64_datatype(), true),
+ semantic_type: SemanticType::Field,
+ column_id,
+ });
+ column_id += 1;
+ }
+ builder.primary_key(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+ builder.build().unwrap()
+}
+
+criterion_group!(benches, write_rows, full_scan, filter_1_host);
+criterion_main!(benches);
diff --git a/src/mito2/benches/merge_tree_bench.rs b/src/mito2/benches/merge_tree_bench.rs
deleted file mode 100644
index 3ad86c2d9e..0000000000
--- a/src/mito2/benches/merge_tree_bench.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use criterion::{criterion_group, criterion_main, Criterion};
-use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
-use mito2::memtable::Memtable;
-use mito2::test_util::memtable_util;
-
-fn bench_merge_tree_memtable(c: &mut Criterion) {
- let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
- let timestamps = (0..100).collect::>();
-
- let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
-
- let _ = c.bench_function("MergeTreeMemtable", |b| {
- let kvs =
- memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
- b.iter(|| {
- memtable.write(&kvs).unwrap();
- });
- });
-}
-
-criterion_group!(benches, bench_merge_tree_memtable);
-criterion_main!(benches);
diff --git a/src/mito2/src/test_util/memtable_util.rs b/src/mito2/src/test_util/memtable_util.rs
index 4f3d9cfd3d..38108dff3c 100644
--- a/src/mito2/src/test_util/memtable_util.rs
+++ b/src/mito2/src/test_util/memtable_util.rs
@@ -219,25 +219,14 @@ pub(crate) fn extract_data_batch(batch: &DataBatch) -> (u16, Vec<(i64, u64)>) {
/// Builds key values with timestamps (ms) and sequences for test.
pub(crate) fn build_key_values_with_ts_seq_values(
- schema: &RegionMetadataRef,
+ metadata: &RegionMetadataRef,
k0: String,
k1: u32,
timestamps: impl Iterator
- ,
values: impl Iterator
- >,
sequence: SequenceNumber,
) -> KeyValues {
- let column_schema = schema
- .column_metadatas
- .iter()
- .map(|c| api::v1::ColumnSchema {
- column_name: c.column_schema.name.clone(),
- datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
- .unwrap()
- .datatype() as i32,
- semantic_type: c.semantic_type as i32,
- ..Default::default()
- })
- .collect();
+ let column_schema = region_metadata_to_row_schema(metadata);
let rows = timestamps
.zip(values)
@@ -269,7 +258,23 @@ pub(crate) fn build_key_values_with_ts_seq_values(
rows,
}),
};
- KeyValues::new(schema.as_ref(), mutation).unwrap()
+ KeyValues::new(metadata.as_ref(), mutation).unwrap()
+}
+
+/// Converts the region metadata to column schemas for a row.
+pub fn region_metadata_to_row_schema(metadata: &RegionMetadataRef) -> Vec {
+ metadata
+ .column_metadatas
+ .iter()
+ .map(|c| api::v1::ColumnSchema {
+ column_name: c.column_schema.name.clone(),
+ datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
+ .unwrap()
+ .datatype() as i32,
+ semantic_type: c.semantic_type as i32,
+ ..Default::default()
+ })
+ .collect()
}
/// Encode keys.