mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-09 14:52:58 +00:00
perf: more benchmarks for memtables (#3491)
* chore: remove duplicate bench * refactor: rename bench * perf: add full scan bench for memtable * feat: filter bench and add time series to bench group * chore: comment * refactor: rename * style: fix clippy
This commit is contained in:
@@ -79,5 +79,6 @@ rand.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "bench_merge_tree"
|
||||
name = "memtable_bench"
|
||||
harness = false
|
||||
required-features = ["test"]
|
||||
|
||||
@@ -7,3 +7,9 @@ The Alfa Romeo [MiTo](https://en.wikipedia.org/wiki/Alfa_Romeo_MiTo) is a front-
|
||||
|
||||
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
|
||||
> <div align="right">-- by Jeremy Clarkson</div>
|
||||
|
||||
## Benchmarks
|
||||
Run benchmarks in this crate:
|
||||
```bash
|
||||
cargo bench -p mito2 -F test
|
||||
```
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod merge_tree_bench;
|
||||
|
||||
use criterion::criterion_main;
|
||||
|
||||
criterion_main! {
|
||||
merge_tree_bench::benches
|
||||
}
|
||||
352
src/mito2/benches/memtable_bench.rs
Normal file
352
src/mito2/benches/memtable_bench.rs
Normal file
@@ -0,0 +1,352 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{Row, Rows, SemanticType};
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use datafusion_common::Column;
|
||||
use datafusion_expr::{lit, Expr};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
||||
use mito2::memtable::time_series::TimeSeriesMemtable;
|
||||
use mito2::memtable::{KeyValues, Memtable};
|
||||
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
|
||||
use rand::rngs::ThreadRng;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use store_api::metadata::{
|
||||
ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use table::predicate::Predicate;
|
||||
|
||||
/// Writes rows.
|
||||
fn write_rows(c: &mut Criterion) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
|
||||
// Note that this test only generate one time series.
|
||||
let mut group = c.benchmark_group("write");
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable =
|
||||
MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
memtable.write(&kvs).unwrap();
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
memtable.write(&kvs).unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Scans all rows.
|
||||
fn full_scan(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let config = MergeTreeConfig::default();
|
||||
let start_sec = 1710043200;
|
||||
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||
|
||||
let mut group = c.benchmark_group("full_scan");
|
||||
group.sample_size(10);
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Filters 1 host.
|
||||
fn filter_1_host(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let config = MergeTreeConfig::default();
|
||||
let start_sec = 1710043200;
|
||||
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||
|
||||
let mut group = c.benchmark_group("filter_1_host");
|
||||
group.sample_size(10);
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
let predicate = generator.random_host_filter();
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
let predicate = generator.random_host_filter();
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
struct Host {
|
||||
hostname: String,
|
||||
region: String,
|
||||
datacenter: String,
|
||||
rack: String,
|
||||
os: String,
|
||||
arch: String,
|
||||
team: String,
|
||||
service: String,
|
||||
service_version: String,
|
||||
service_environment: String,
|
||||
}
|
||||
|
||||
impl Host {
|
||||
fn random_with_id(id: usize) -> Host {
|
||||
let mut rng = rand::thread_rng();
|
||||
let region = format!("ap-southeast-{}", rng.gen_range(0..10));
|
||||
let datacenter = format!(
|
||||
"{}{}",
|
||||
region,
|
||||
['a', 'b', 'c', 'd', 'e'].choose(&mut rng).unwrap()
|
||||
);
|
||||
Host {
|
||||
hostname: format!("host_{id}"),
|
||||
region,
|
||||
datacenter,
|
||||
rack: rng.gen_range(0..100).to_string(),
|
||||
os: "Ubuntu16.04LTS".to_string(),
|
||||
arch: "x86".to_string(),
|
||||
team: "CHI".to_string(),
|
||||
service: rng.gen_range(0..100).to_string(),
|
||||
service_version: rng.gen_range(0..10).to_string(),
|
||||
service_environment: "test".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_values(&self, values: &mut Vec<api::v1::Value>) {
|
||||
let tags = [
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.hostname.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.region.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.datacenter.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.rack.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.os.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.arch.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.team.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.service.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.service_version.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.service_environment.clone())),
|
||||
},
|
||||
];
|
||||
for tag in tags {
|
||||
values.push(tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CpuDataGenerator {
|
||||
metadata: RegionMetadataRef,
|
||||
column_schemas: Vec<api::v1::ColumnSchema>,
|
||||
hosts: Vec<Host>,
|
||||
start_sec: i64,
|
||||
end_sec: i64,
|
||||
}
|
||||
|
||||
impl CpuDataGenerator {
|
||||
fn new(metadata: RegionMetadataRef, num_hosts: usize, start_sec: i64, end_sec: i64) -> Self {
|
||||
let column_schemas = region_metadata_to_row_schema(&metadata);
|
||||
Self {
|
||||
metadata,
|
||||
column_schemas,
|
||||
hosts: Self::generate_hosts(num_hosts),
|
||||
start_sec,
|
||||
end_sec,
|
||||
}
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = KeyValues> + '_ {
|
||||
// point per 10s.
|
||||
(self.start_sec..self.end_sec)
|
||||
.step_by(10)
|
||||
.enumerate()
|
||||
.map(|(seq, ts)| self.build_key_values(seq, ts))
|
||||
}
|
||||
|
||||
fn build_key_values(&self, seq: usize, current_sec: i64) -> KeyValues {
|
||||
let rows = self
|
||||
.hosts
|
||||
.iter()
|
||||
.map(|host| {
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut values = Vec::with_capacity(21);
|
||||
values.push(api::v1::Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(current_sec * 1000)),
|
||||
});
|
||||
host.fill_values(&mut values);
|
||||
for _ in 0..10 {
|
||||
values.push(api::v1::Value {
|
||||
value_data: Some(ValueData::F64Value(Self::random_f64(&mut rng))),
|
||||
});
|
||||
}
|
||||
Row { values }
|
||||
})
|
||||
.collect();
|
||||
let mutation = api::v1::Mutation {
|
||||
op_type: api::v1::OpType::Put as i32,
|
||||
sequence: seq as u64,
|
||||
rows: Some(Rows {
|
||||
schema: self.column_schemas.clone(),
|
||||
rows,
|
||||
}),
|
||||
};
|
||||
|
||||
KeyValues::new(&self.metadata, mutation).unwrap()
|
||||
}
|
||||
|
||||
fn random_host_filter(&self) -> Predicate {
|
||||
let host = self.random_hostname();
|
||||
let expr = Expr::Column(Column::from_name("hostname")).eq(lit(host));
|
||||
Predicate::new(vec![expr.into()])
|
||||
}
|
||||
|
||||
fn random_hostname(&self) -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
self.hosts.choose(&mut rng).unwrap().hostname.clone()
|
||||
}
|
||||
|
||||
fn random_f64(rng: &mut ThreadRng) -> f64 {
|
||||
let base: u32 = rng.gen_range(30..95);
|
||||
base as f64
|
||||
}
|
||||
|
||||
fn generate_hosts(num_hosts: usize) -> Vec<Host> {
|
||||
(0..num_hosts).map(Host::random_with_id).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a metadata for TSBS cpu-like table.
|
||||
fn cpu_metadata() -> RegionMetadata {
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
|
||||
builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 0,
|
||||
});
|
||||
let mut column_id = 1;
|
||||
let tags = [
|
||||
"hostname",
|
||||
"region",
|
||||
"datacenter",
|
||||
"rack",
|
||||
"os",
|
||||
"arch",
|
||||
"team",
|
||||
"service",
|
||||
"service_version",
|
||||
"service_environment",
|
||||
];
|
||||
for tag in tags {
|
||||
builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(tag, ConcreteDataType::string_datatype(), true),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id,
|
||||
});
|
||||
column_id += 1;
|
||||
}
|
||||
let fields = [
|
||||
"usage_user",
|
||||
"usage_system",
|
||||
"usage_idle",
|
||||
"usage_nice",
|
||||
"usage_iowait",
|
||||
"usage_irq",
|
||||
"usage_softirq",
|
||||
"usage_steal",
|
||||
"usage_guest",
|
||||
"usage_guest_nice",
|
||||
];
|
||||
for field in fields {
|
||||
builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(field, ConcreteDataType::float64_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id,
|
||||
});
|
||||
column_id += 1;
|
||||
}
|
||||
builder.primary_key(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
criterion_group!(benches, write_rows, full_scan, filter_1_host);
|
||||
criterion_main!(benches);
|
||||
@@ -1,36 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
||||
use mito2::memtable::Memtable;
|
||||
use mito2::test_util::memtable_util;
|
||||
|
||||
fn bench_merge_tree_memtable(c: &mut Criterion) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||
|
||||
let _ = c.bench_function("MergeTreeMemtable", |b| {
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
memtable.write(&kvs).unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_merge_tree_memtable);
|
||||
criterion_main!(benches);
|
||||
@@ -219,25 +219,14 @@ pub(crate) fn extract_data_batch(batch: &DataBatch) -> (u16, Vec<(i64, u64)>) {
|
||||
|
||||
/// Builds key values with timestamps (ms) and sequences for test.
|
||||
pub(crate) fn build_key_values_with_ts_seq_values(
|
||||
schema: &RegionMetadataRef,
|
||||
metadata: &RegionMetadataRef,
|
||||
k0: String,
|
||||
k1: u32,
|
||||
timestamps: impl Iterator<Item = i64>,
|
||||
values: impl Iterator<Item = Option<f64>>,
|
||||
sequence: SequenceNumber,
|
||||
) -> KeyValues {
|
||||
let column_schema = schema
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|c| api::v1::ColumnSchema {
|
||||
column_name: c.column_schema.name.clone(),
|
||||
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: c.semantic_type as i32,
|
||||
..Default::default()
|
||||
})
|
||||
.collect();
|
||||
let column_schema = region_metadata_to_row_schema(metadata);
|
||||
|
||||
let rows = timestamps
|
||||
.zip(values)
|
||||
@@ -269,7 +258,23 @@ pub(crate) fn build_key_values_with_ts_seq_values(
|
||||
rows,
|
||||
}),
|
||||
};
|
||||
KeyValues::new(schema.as_ref(), mutation).unwrap()
|
||||
KeyValues::new(metadata.as_ref(), mutation).unwrap()
|
||||
}
|
||||
|
||||
/// Converts the region metadata to column schemas for a row.
|
||||
pub fn region_metadata_to_row_schema(metadata: &RegionMetadataRef) -> Vec<api::v1::ColumnSchema> {
|
||||
metadata
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|c| api::v1::ColumnSchema {
|
||||
column_name: c.column_schema.name.clone(),
|
||||
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: c.semantic_type as i32,
|
||||
..Default::default()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Encode keys.
|
||||
|
||||
Reference in New Issue
Block a user