bench: read/write for memtable (#52)

* benchmark

* fix style

Co-authored-by: 张心怡 <zhangxinyi@zhangxinyideMacBook-Pro.local>
This commit is contained in:
天空好像下雨~
2022-07-11 17:44:22 +08:00
committed by GitHub
parent 65890e09f6
commit 8852c9bc32
12 changed files with 609 additions and 3 deletions

197
Cargo.lock generated
View File

@@ -210,6 +210,12 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "atomic_float"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62af46d040ba9df09edc6528dae9d8e49f5f3e82f55b7d2ec31a733c38dbc49d"
[[package]]
name = "atty"
version = "0.2.14"
@@ -441,6 +447,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
[[package]]
name = "cast"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
dependencies = [
"rustc_version",
]
[[package]]
name = "cc"
version = "1.0.73"
@@ -492,6 +507,17 @@ dependencies = [
"phf_codegen",
]
[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"bitflags",
"textwrap 0.11.0",
"unicode-width",
]
[[package]]
name = "clap"
version = "3.1.17"
@@ -506,7 +532,7 @@ dependencies = [
"lazy_static",
"strsim",
"termcolor",
"textwrap",
"textwrap 0.15.0",
]
[[package]]
@@ -554,7 +580,7 @@ dependencies = [
name = "cmd"
version = "0.1.0"
dependencies = [
"clap",
"clap 3.1.17",
"common-error",
"common-telemetry",
"datanode",
@@ -766,6 +792,42 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "criterion"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10"
dependencies = [
"atty",
"cast",
"clap 2.34.0",
"criterion-plot",
"csv",
"itertools",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_cbor",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
dependencies = [
"cast",
"itertools",
]
[[package]]
name = "crossbeam"
version = "0.8.1"
@@ -1293,6 +1355,12 @@ dependencies = [
"tracing",
]
[[package]]
name = "half"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hash_hasher"
version = "2.0.3"
@@ -2096,6 +2164,12 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opendal"
version = "0.6.2"
@@ -2469,6 +2543,34 @@ dependencies = [
"array-init-cursor",
]
[[package]]
name = "plotters"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c"
[[package]]
name = "plotters-svg"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9"
dependencies = [
"plotters-backend",
]
[[package]]
name = "ppv-lite86"
version = "0.2.16"
@@ -2708,6 +2810,30 @@ dependencies = [
"bitflags",
]
[[package]]
name = "rayon"
version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]]
name = "rdrand"
version = "0.4.0"
@@ -2885,6 +3011,15 @@ version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "rustversion"
version = "1.0.6"
@@ -2897,6 +3032,15 @@ version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "schannel"
version = "0.1.19"
@@ -2936,6 +3080,12 @@ dependencies = [
"libc",
]
[[package]]
name = "semver"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41d061efea015927ac527063765e73601444cdc344ba855bc7bd44578b25e1c"
[[package]]
name = "serde"
version = "1.0.137"
@@ -2945,6 +3095,16 @@ dependencies = [
"serde_derive",
]
[[package]]
name = "serde_cbor"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
dependencies = [
"half",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.137"
@@ -3126,9 +3286,12 @@ version = "0.1.0"
dependencies = [
"arc-swap",
"async-trait",
"atomic_float",
"common-error",
"common-telemetry",
"criterion",
"datatypes",
"rand 0.8.5",
"snafu",
"store-api",
"tokio",
@@ -3289,6 +3452,15 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "textwrap"
version = "0.15.0"
@@ -3386,6 +3558,16 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792"
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@@ -3837,6 +4019,17 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.0"

View File

@@ -14,3 +14,12 @@ datatypes = { path = "../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
store-api = { path = "../store-api" }
tokio = { version = "1.18", features = ["full"] }
[dev-dependencies]
criterion = "0.3"
rand = "0.8"
atomic_float="0.1"
[[bench]]
name = "bench_main"
harness = false

View File

@@ -0,0 +1,9 @@
use criterion::criterion_main;
mod memtable;
criterion_main! {
memtable::bench_memtable_read::benches,
memtable::bench_memtable_write::benches,
memtable::bench_memtable_read_write_ratio::benches,
}

View File

@@ -0,0 +1,17 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use crate::memtable::{generate_kvs, util::bench_context::BenchContext};
fn bench_memtable_read(c: &mut Criterion) {
// the length of string in value is 20
let kvs = generate_kvs(10, 10000, 20);
let ctx = BenchContext::new();
kvs.iter().for_each(|kv| ctx.write(kv));
let mut group = c.benchmark_group("memtable_read");
group.throughput(Throughput::Elements(10 * 10000));
group.bench_function("read", |b| b.iter(|| ctx.read(100)));
group.finish();
}
criterion_group!(benches, bench_memtable_read);
criterion_main!(benches);

View File

@@ -0,0 +1,130 @@
use std::{
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc,
},
thread,
time::Instant,
};
use atomic_float::AtomicF64;
use criterion::{
criterion_group, criterion_main, BatchSize, Bencher, BenchmarkId, Criterion, Throughput,
};
use rand::Rng;
use crate::memtable::{generate_kvs, util::bench_context::BenchContext};
static READ_NUM: AtomicUsize = AtomicUsize::new(0);
static WRITE_NUM: AtomicUsize = AtomicUsize::new(0);
static READ_SECS: AtomicF64 = AtomicF64::new(0.0);
static WRITE_SECS: AtomicF64 = AtomicF64::new(0.0);
struct Input {
ratio: bool,
kv_size: usize,
batch_size: usize,
}
fn memtable_round(ctx: &BenchContext, input: &Input) {
if input.ratio {
let now = Instant::now();
let read_count = ctx.read(input.batch_size);
let d = now.elapsed();
READ_SECS.fetch_add(
d.as_secs() as f64 + d.subsec_nanos() as f64 * 1e-9,
Ordering::Relaxed,
);
READ_NUM.fetch_add(read_count, Ordering::Relaxed);
} else {
generate_kvs(input.kv_size, input.batch_size, 20)
.iter()
.for_each(|kv| {
let now = Instant::now();
ctx.write(kv);
let d = now.elapsed();
WRITE_SECS.fetch_add(
d.as_secs() as f64 + d.subsec_nanos() as f64 * 1e-9,
Ordering::Relaxed,
);
WRITE_NUM.fetch_add(kv.len(), Ordering::Relaxed);
});
}
}
fn bench_read_write_ctx_frac(b: &mut Bencher<'_>, frac: &usize) {
let frac = *frac;
let ctx = Arc::new(BenchContext::default());
let thread_ctx = ctx.clone();
let stop = Arc::new(AtomicBool::new(false));
let thread_stop = stop.clone();
let handle = thread::spawn(move || {
let mut rng = rand::thread_rng();
while !thread_stop.load(Ordering::Relaxed) {
let f = rng.gen_range(0..=10);
let input = Input {
ratio: f < frac,
kv_size: 100,
batch_size: 1000,
};
memtable_round(&thread_ctx, &input);
}
});
let mut rng = rand::thread_rng();
b.iter_batched_ref(
|| {
let f = rng.gen_range(0..=10);
Input {
ratio: f < frac,
kv_size: 100,
batch_size: 1000,
}
},
|input| {
memtable_round(&ctx, input);
},
BatchSize::SmallInput,
);
stop.store(true, Ordering::Relaxed);
handle.join().unwrap();
}
#[allow(clippy::print_stdout)]
fn bench_memtable_read_write_ratio(c: &mut Criterion) {
let mut group = c.benchmark_group("memtable_read_write_ratio");
for i in 0..=10 {
READ_NUM.store(0, Ordering::Relaxed);
WRITE_NUM.store(0, Ordering::Relaxed);
READ_SECS.store(0.0, Ordering::Relaxed);
WRITE_SECS.store(0.0, Ordering::Relaxed);
group.bench_with_input(
BenchmarkId::from_parameter(format!(
"read ratio: {:.2}% , write ratio: {:.2}%",
i as f64 / 10_f64 * 100.0,
(10 - i) as f64 / 10_f64 * 100.0,
)),
&i,
bench_read_write_ctx_frac,
);
group.throughput(Throughput::Elements(100 * 1000));
// the time is a little different the real time
let read_num = READ_NUM.load(Ordering::Relaxed);
let read_time = READ_SECS.load(Ordering::Relaxed);
let read_tps = read_num as f64 / read_time as f64;
let write_num = WRITE_NUM.load(Ordering::Relaxed);
let write_time = WRITE_SECS.load(Ordering::Relaxed);
let write_tps = write_num as f64 / write_time as f64;
println!(
"\nread numbers: {}, read thrpt: {}\nwrite numbers: {}, write thrpt {}\n",
read_num, read_tps, write_num, write_tps
);
}
group.finish();
}
criterion_group!(benches, bench_memtable_read_write_ratio);
criterion_main!(benches);

View File

@@ -0,0 +1,19 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use crate::memtable::generate_kvs;
use crate::memtable::util::bench_context::BenchContext;
pub fn bench_memtable_write(c: &mut Criterion) {
// the length of string in value is 20
let kvs = generate_kvs(10, 1000, 20);
let mut group = c.benchmark_group("memtable_write");
group.throughput(Throughput::Elements(10 * 1000));
group.bench_function("write", |b| {
let ctx = BenchContext::new();
b.iter(|| kvs.iter().for_each(|kv| ctx.write(kv)))
});
group.finish();
}
criterion_group!(benches, bench_memtable_write);
criterion_main!(benches);

View File

@@ -0,0 +1,106 @@
pub mod bench_memtable_read;
pub mod bench_memtable_read_write_ratio;
pub mod bench_memtable_write;
pub mod util;
use std::sync::{
atomic::{AtomicU64, Ordering},
Arc,
};
use datatypes::{
prelude::ScalarVectorBuilder,
vectors::{Int64VectorBuilder, StringVectorBuilder, UInt64VectorBuilder},
};
use rand::{distributions::Alphanumeric, prelude::ThreadRng, Rng};
use storage::memtable::KeyValues;
use store_api::storage::{SequenceNumber, ValueType};
static NEXT_SEQUENCE: AtomicU64 = AtomicU64::new(0);
fn get_sequence() -> SequenceNumber {
NEXT_SEQUENCE.fetch_add(1, Ordering::Relaxed)
}
fn random_kv(rng: &mut ThreadRng, value_size: usize) -> ((i64, u64), (Option<u64>, String)) {
let key0 = rng.gen_range(0..10000);
let key1 = rng.gen::<u64>();
let value1 = Some(rng.gen::<u64>());
let value2 = rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(value_size)
.map(char::from)
.collect();
((key0, key1), (value1, value2))
}
type KeyTuple = (i64, u64);
type ValueTuple = (Option<u64>, String);
fn random_kvs(len: usize, value_size: usize) -> (Vec<KeyTuple>, Vec<ValueTuple>) {
let mut keys = Vec::with_capacity(len);
let mut values = Vec::with_capacity(len);
for _ in 0..len {
let mut rng = rand::thread_rng();
let (key, value) = random_kv(&mut rng, value_size);
keys.push(key);
values.push(value);
}
(keys, values)
}
fn kvs_with_index(
sequence: SequenceNumber,
value_type: ValueType,
start_index_in_batch: usize,
keys: &[(i64, u64)],
values: &[(Option<u64>, String)],
) -> KeyValues {
let mut key_builders = (
Int64VectorBuilder::with_capacity(keys.len()),
UInt64VectorBuilder::with_capacity(keys.len()),
);
for key in keys {
key_builders.0.push(Some(key.0));
key_builders.1.push(Some(key.1));
}
let row_keys = vec![
Arc::new(key_builders.0.finish()) as _,
Arc::new(key_builders.1.finish()) as _,
];
let mut value_builders = (
UInt64VectorBuilder::with_capacity(values.len()),
StringVectorBuilder::with_capacity(values.len()),
);
for value in values {
value_builders.0.push(value.0);
value_builders.1.push(Some(&value.1));
}
let row_values = vec![
Arc::new(value_builders.0.finish()) as _,
Arc::new(value_builders.1.finish()) as _,
];
KeyValues {
sequence,
value_type,
start_index_in_batch,
keys: row_keys,
values: row_values,
}
}
fn generate_kv(kv_size: usize, start_index_in_batch: usize, value_size: usize) -> KeyValues {
let (keys, values) = random_kvs(kv_size, value_size);
kvs_with_index(
get_sequence(),
ValueType::Put,
start_index_in_batch,
&keys,
&values,
)
}
fn generate_kvs(kv_size: usize, size: usize, value_size: usize) -> Vec<KeyValues> {
(0..size)
.map(|i| generate_kv(kv_size, i, value_size))
.collect()
}

View File

@@ -0,0 +1,37 @@
use storage::memtable::{IterContext, KeyValues, MemtableRef};
use store_api::storage::SequenceNumber;
use crate::memtable::util::new_memtable;
pub struct BenchContext {
memtable: MemtableRef,
}
impl Default for BenchContext {
fn default() -> Self {
BenchContext::new()
}
}
impl BenchContext {
pub fn new() -> BenchContext {
BenchContext {
memtable: new_memtable(),
}
}
pub fn write(&self, kvs: &KeyValues) {
self.memtable.write(kvs).unwrap();
}
pub fn read(&self, batch_size: usize) -> usize {
let mut read_count = 0;
let iter_ctx = IterContext {
batch_size,
visible_sequence: SequenceNumber::MAX,
};
let mut iter = self.memtable.iter(iter_ctx).unwrap();
while let Ok(Some(_)) = iter.next() {
read_count += batch_size;
}
read_count
}
}

View File

@@ -0,0 +1,26 @@
pub mod bench_context;
pub mod regiondesc_util;
pub mod schema_util;
use datatypes::type_id::LogicalTypeId;
use storage::{
memtable::{DefaultMemtableBuilder, MemtableBuilder, MemtableRef, MemtableSchema},
metadata::RegionMetadata,
};
use crate::memtable::util::regiondesc_util::RegionDescBuilder;
pub const TIMESTAMP_NAME: &str = "timestamp";
pub fn schema_for_test() -> MemtableSchema {
let desc = RegionDescBuilder::new("bench")
.push_value_column(("v1", LogicalTypeId::UInt64, true))
.push_value_column(("v2", LogicalTypeId::String, true))
.build();
let metadata: RegionMetadata = desc.try_into().unwrap();
MemtableSchema::new(metadata.columns_row_key)
}
pub fn new_memtable() -> MemtableRef {
DefaultMemtableBuilder {}.build(schema_for_test())
}

View File

@@ -0,0 +1,58 @@
use datatypes::prelude::ConcreteDataType;
use store_api::storage::{
ColumnDescriptor, ColumnDescriptorBuilder, ColumnFamilyDescriptorBuilder, ColumnId,
RegionDescriptor, RowKeyDescriptorBuilder,
};
use super::{schema_util::ColumnDef, TIMESTAMP_NAME};
pub struct RegionDescBuilder {
name: String,
last_column_id: ColumnId,
key_builder: RowKeyDescriptorBuilder,
default_cf_builder: ColumnFamilyDescriptorBuilder,
}
impl RegionDescBuilder {
pub fn new<T: Into<String>>(name: T) -> Self {
let key_builder = RowKeyDescriptorBuilder::new(
ColumnDescriptorBuilder::new(2, TIMESTAMP_NAME, ConcreteDataType::int64_datatype())
.is_nullable(false)
.build(),
);
Self {
name: name.into(),
last_column_id: 2,
key_builder,
default_cf_builder: ColumnFamilyDescriptorBuilder::new(),
}
}
pub fn push_value_column(mut self, column_def: ColumnDef) -> Self {
let column = self.new_column(column_def);
self.default_cf_builder = self.default_cf_builder.push_column(column);
self
}
pub fn build(self) -> RegionDescriptor {
RegionDescriptor {
id: 0,
name: self.name,
row_key: self.key_builder.build(),
default_cf: self.default_cf_builder.build(),
extra_cfs: Vec::new(),
}
}
fn alloc_column_id(&mut self) -> ColumnId {
self.last_column_id += 1;
self.last_column_id
}
fn new_column(&mut self, column_def: ColumnDef) -> ColumnDescriptor {
let datatype = column_def.1.data_type();
ColumnDescriptorBuilder::new(self.alloc_column_id(), column_def.0, datatype)
.is_nullable(column_def.2)
.build()
}
}

View File

@@ -0,0 +1,3 @@
use datatypes::type_id::LogicalTypeId;
pub type ColumnDef<'a> = (&'a str, LogicalTypeId, bool);

View File

@@ -149,7 +149,6 @@ async fn test_simple_put_scan() {
let output = tester.full_scan().await;
assert_eq!(data, output);
}
#[tokio::test]
async fn test_sequence_increase() {
let tester = Tester::default();