mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-14 20:10:37 +00:00
perf: improve bloom filter reader's byte reading logic (#6658)
* perf: improve bloom filter reader's byte reading logic Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * revert toml change Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * clearify comment Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * benchmark Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update lock file Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * pub util fn Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * note endian Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -6133,6 +6133,7 @@ dependencies = [
|
||||
"prost 0.13.5",
|
||||
"puffin",
|
||||
"rand 0.9.0",
|
||||
"rand_chacha 0.9.0",
|
||||
"regex",
|
||||
"regex-automata 0.4.8",
|
||||
"roaring",
|
||||
|
||||
@@ -44,6 +44,7 @@ uuid.workspace = true
|
||||
common-test-util.workspace = true
|
||||
criterion = "0.4"
|
||||
rand.workspace = true
|
||||
rand_chacha = "0.9"
|
||||
tempfile.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
@@ -51,3 +52,7 @@ tokio-util.workspace = true
|
||||
[[bench]]
|
||||
name = "tokenizer_bench"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "bytes_to_u64_vec"
|
||||
harness = false
|
||||
|
||||
99
src/index/benches/bytes_to_u64_vec.rs
Normal file
99
src/index/benches/bytes_to_u64_vec.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::hint::black_box;
|
||||
|
||||
use bytes::Bytes;
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use index::bloom_filter::reader::bytes_to_u64_vec;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
|
||||
/// Generate test data that is guaranteed to be aligned to 8-byte boundary
|
||||
fn generate_aligned_data(size: usize) -> Bytes {
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(42);
|
||||
let u64_count = size / 8; // Number of u64 values
|
||||
|
||||
// Generate random u64 values directly - this guarantees alignment
|
||||
let mut u64_data: Vec<u64> = Vec::with_capacity(u64_count);
|
||||
for _ in 0..u64_count {
|
||||
u64_data.push(rng.random::<u64>());
|
||||
}
|
||||
|
||||
// Transmute Vec<u64> to Vec<u8> while preserving alignment
|
||||
let byte_vec = unsafe {
|
||||
let ptr = u64_data.as_mut_ptr() as *mut u8;
|
||||
let len = u64_data.len() * std::mem::size_of::<u64>();
|
||||
let cap = u64_data.capacity() * std::mem::size_of::<u64>();
|
||||
std::mem::forget(u64_data); // Prevent dropping the original Vec
|
||||
Vec::from_raw_parts(ptr, len, cap)
|
||||
};
|
||||
|
||||
Bytes::from(byte_vec)
|
||||
}
|
||||
|
||||
/// Generate test data that is guaranteed to be unaligned
|
||||
fn generate_unaligned_data(size: usize) -> Bytes {
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(42);
|
||||
let u64_count = size / 8; // Number of u64 values
|
||||
|
||||
// Generate random u64 values - start with aligned data
|
||||
let mut u64_data: Vec<u64> = Vec::with_capacity(u64_count);
|
||||
for _ in 0..u64_count {
|
||||
u64_data.push(rng.random::<u64>());
|
||||
}
|
||||
|
||||
// Transmute Vec<u64> to Vec<u8>
|
||||
let byte_vec = unsafe {
|
||||
let ptr = u64_data.as_mut_ptr() as *mut u8;
|
||||
let len = u64_data.len() * std::mem::size_of::<u64>();
|
||||
let cap = u64_data.capacity() * std::mem::size_of::<u64>();
|
||||
std::mem::forget(u64_data); // Prevent dropping the original Vec
|
||||
Vec::from_raw_parts(ptr, len, cap)
|
||||
};
|
||||
|
||||
let unaligned_bytes = Bytes::from(byte_vec);
|
||||
unaligned_bytes.slice(1..)
|
||||
}
|
||||
|
||||
fn benchmark_convert(c: &mut Criterion) {
|
||||
let sizes = vec![1024, 16384, 262144, 1048576]; // 1KB to 1MB
|
||||
|
||||
let mut group = c.benchmark_group("bytes_to_u64_vec");
|
||||
|
||||
for size in sizes {
|
||||
let data = generate_aligned_data(size);
|
||||
group.throughput(Throughput::Bytes(data.len() as u64));
|
||||
group.bench_with_input(BenchmarkId::new("aligned", size), &data, |b, data| {
|
||||
b.iter(|| {
|
||||
let result = bytes_to_u64_vec(black_box(data));
|
||||
black_box(result);
|
||||
});
|
||||
});
|
||||
|
||||
let data = generate_unaligned_data(size);
|
||||
group.throughput(Throughput::Bytes(data.len() as u64));
|
||||
group.bench_with_input(BenchmarkId::new("unaligned", size), &data, |b, data| {
|
||||
b.iter(|| {
|
||||
let result = bytes_to_u64_vec(black_box(data));
|
||||
black_box(result);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, benchmark_convert);
|
||||
criterion_main!(benches);
|
||||
@@ -12,9 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::ops::{Range, Rem};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytemuck::try_cast_slice;
|
||||
use bytes::Bytes;
|
||||
use common_base::range_read::RangeReader;
|
||||
use fastbloom::BloomFilter;
|
||||
@@ -33,6 +34,47 @@ const BLOOM_META_LEN_SIZE: u64 = 4;
|
||||
/// Default prefetch size of bloom filter meta.
|
||||
pub const DEFAULT_PREFETCH_SIZE: u64 = 8192; // 8KiB
|
||||
|
||||
/// Safely converts bytes to Vec<u64> using bytemuck for optimal performance.
|
||||
/// Faster than chunking and converting each piece individually.
|
||||
///
|
||||
/// The input bytes are a sequence of little-endian u64s.
|
||||
pub fn bytes_to_u64_vec(bytes: &Bytes) -> Vec<u64> {
|
||||
// drop tailing things, this keeps the same behavior with `chunks_exact`.
|
||||
let aligned_length = bytes.len() - bytes.len().rem(std::mem::size_of::<u64>());
|
||||
let byte_slice = &bytes[..aligned_length];
|
||||
|
||||
// Try fast path first: direct cast if aligned
|
||||
let u64_vec = if let Ok(u64_slice) = try_cast_slice::<u8, u64>(byte_slice) {
|
||||
u64_slice.to_vec()
|
||||
} else {
|
||||
// Slow path: create aligned Vec<u64> and copy data
|
||||
let u64_count = byte_slice.len() / std::mem::size_of::<u64>();
|
||||
let mut u64_vec = Vec::<u64>::with_capacity(u64_count);
|
||||
|
||||
// SAFETY: We're creating a properly sized slice from uninitialized but allocated memory
|
||||
// to copy bytes into. The slice has exactly the right size for the byte data.
|
||||
let dest_slice = unsafe {
|
||||
std::slice::from_raw_parts_mut(u64_vec.as_mut_ptr() as *mut u8, byte_slice.len())
|
||||
};
|
||||
dest_slice.copy_from_slice(byte_slice);
|
||||
|
||||
// SAFETY: We've just initialized exactly u64_count elements worth of bytes
|
||||
unsafe { u64_vec.set_len(u64_count) };
|
||||
u64_vec
|
||||
};
|
||||
|
||||
// Convert from platform endianness to little endian if needed
|
||||
// Just in case.
|
||||
#[cfg(target_endian = "little")]
|
||||
{
|
||||
u64_vec
|
||||
}
|
||||
#[cfg(target_endian = "big")]
|
||||
{
|
||||
u64_vec.into_iter().map(|x| x.swap_bytes()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// `BloomFilterReader` reads the bloom filter from the file.
|
||||
#[async_trait]
|
||||
pub trait BloomFilterReader: Sync {
|
||||
@@ -56,10 +98,7 @@ pub trait BloomFilterReader: Sync {
|
||||
/// Reads a bloom filter with the given location.
|
||||
async fn bloom_filter(&self, loc: &BloomFilterLoc) -> Result<BloomFilter> {
|
||||
let bytes = self.range_read(loc.offset, loc.size as _).await?;
|
||||
let vec = bytes
|
||||
.chunks_exact(std::mem::size_of::<u64>())
|
||||
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
|
||||
.collect();
|
||||
let vec = bytes_to_u64_vec(&bytes);
|
||||
let bm = BloomFilter::from_vec(vec)
|
||||
.seed(&SEED)
|
||||
.expected_items(loc.element_count as _);
|
||||
@@ -75,10 +114,7 @@ pub trait BloomFilterReader: Sync {
|
||||
|
||||
let mut result = Vec::with_capacity(bss.len());
|
||||
for (bs, loc) in bss.into_iter().zip(locs.iter()) {
|
||||
let vec = bs
|
||||
.chunks_exact(std::mem::size_of::<u64>())
|
||||
.map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap()))
|
||||
.collect();
|
||||
let vec = bytes_to_u64_vec(&bs);
|
||||
let bm = BloomFilter::from_vec(vec)
|
||||
.seed(&SEED)
|
||||
.expected_items(loc.element_count as _);
|
||||
|
||||
Reference in New Issue
Block a user