mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-01 21:00:38 +00:00
feat(wal): support bulk wal entries (#6178)
* feat/bulk-wal: ### Refactor: Simplify Data Handling in LogStore Implementations - **`kafka/log_store.rs`, `raft_engine/log_store.rs`, `wal.rs`, `raw_entry_reader.rs`, `logstore.rs`:** - Refactored `entry` and `build_entry` functions to accept `Vec<u8>` directly instead of `&mut Vec<u8>`. - Removed usage of `std::mem::take` for data handling, simplifying the code and improving readability. - Updated test cases to align with the new function signatures. * feat/bulk-wal: ### Add Support for Bulk WAL Entries and Flight Data Encoding - **Add `raw_data` field to `BulkPart` and related structs**: Updated `BulkPart` and related structures in `src/mito2/src/memtable/bulk/part.rs`, `src/mito2/src/memtable/simple_bulk_memtable.rs`, `src/mito2/src/memtable/time_partition.rs`, `src/mito2/src/region_write_ctx.rs`, `src/mito2/src/worker/handle_bulk_insert.rs`, and `src/store-api/src/region_request.rs` to include a new `raw_data` field for handling Arrow IPC data. - **Implement Flight Data Encoding**: Added a new module `flight` in `src/common/test-util/src/flight.rs` to encode record batches to Flight data format. - **Update `greptime-proto` dependency**: Changed the revision of the `greptime-proto` dependency in `Cargo.lock` and `Cargo.toml`. - **Enhance WAL Writer and Tests**: Modified `src/mito2/src/wal.rs` and related test files to support bulk WAL entries and added tests for encoding and handling bulk data. * feat/bulk-wal: - **Update `greptime-proto` Dependency**: Updated the `greptime-proto` dependency to a new revision in `Cargo.lock` and `Cargo.toml`. - **Add `common-grpc` Dependency**: Added `common-grpc` as a dependency in `Cargo.lock` and `src/mito2/Cargo.toml`. - **Refactor `BulkPart` Structure**: Removed `num_rows` field and added `num_rows()` method in `src/mito2/src/memtable/bulk/part.rs`. Updated related usages in `src/mito2/src/memtable/simple_bulk_memtable.rs`, `src/mito2/src/memtable/time_partition.rs`, `src/mito2/src/memtable/time_series.rs`, `src/mito2/src/region_write_ctx.rs`, and `src/mito2/src/worker/handle_bulk_insert.rs`. - **Implement `TryFrom` and `From` for `BulkWalEntry`**: Added implementations for converting between `BulkPart` and `BulkWalEntry` in `src/mito2/src/memtable/bulk/part.rs`. - **Handle Bulk Entries in Region Opener**: Added logic to process bulk entries in `src/mito2/src/region/opener.rs`. - **Fix `BulkInsertRequest` Handling**: Corrected `region_id` handling in `src/operator/src/bulk_insert.rs` and `src/store-api/src/region_request.rs`. - **Add Error Variant for `ConvertBulkWalEntry`**: Added a new error variant in `src/mito2/src/error.rs` for handling bulk WAL entry conversion errors. * fix: ci * feat/bulk-wal: Add bulk write operation in `opener.rs` - Enhanced the region write context by adding a call to `write_bulk()` after `write_memtable()` in `opener.rs`. - This change aims to improve the efficiency of writing operations by enabling bulk writes. * feat/bulk-wal: Enhance error handling and metrics in `bulk_insert.rs` - Updated `Inserter` to improve error handling by capturing the result of `datanode.handle(request)` and incrementing the `DIST_INGEST_ROW_COUNT` metric with the number of affected rows. * feat/bulk-wal: ### Remove Encode Error Handling for WAL Entries - **`error.rs`**: Removed the `EncodeWal` error variant and its associated handling. - **`wal.rs`**: Eliminated the `entry_encode_buf` buffer and its usage for encoding WAL entries. Replaced with direct encoding to a vector using `encode_to_vec()`.
This commit is contained in:
@@ -89,7 +89,7 @@ pub trait LogStore: Send + Sync + 'static + std::fmt::Debug {
|
||||
/// Makes an entry instance of the associated Entry type
|
||||
fn entry(
|
||||
&self,
|
||||
data: &mut Vec<u8>,
|
||||
data: Vec<u8>,
|
||||
entry_id: EntryId,
|
||||
region_id: RegionId,
|
||||
provider: &Provider,
|
||||
|
||||
@@ -22,13 +22,13 @@ use api::v1::column_def::{
|
||||
};
|
||||
use api::v1::region::bulk_insert_request::Body;
|
||||
use api::v1::region::{
|
||||
alter_request, compact_request, region_request, AlterRequest, AlterRequests, ArrowIpc,
|
||||
BulkInsertRequest, CloseRequest, CompactRequest, CreateRequest, CreateRequests, DeleteRequests,
|
||||
DropRequest, DropRequests, FlushRequest, InsertRequests, OpenRequest, TruncateRequest,
|
||||
alter_request, compact_request, region_request, AlterRequest, AlterRequests, BulkInsertRequest,
|
||||
CloseRequest, CompactRequest, CreateRequest, CreateRequests, DeleteRequests, DropRequest,
|
||||
DropRequests, FlushRequest, InsertRequests, OpenRequest, TruncateRequest,
|
||||
};
|
||||
use api::v1::{
|
||||
self, set_index, Analyzer, FulltextBackend as PbFulltextBackend, Option as PbOption, Rows,
|
||||
SemanticType, SkippingIndexType as PbSkippingIndexType, WriteHint,
|
||||
self, set_index, Analyzer, ArrowIpc, FulltextBackend as PbFulltextBackend, Option as PbOption,
|
||||
Rows, SemanticType, SkippingIndexType as PbSkippingIndexType, WriteHint,
|
||||
};
|
||||
pub use common_base::AffectedRows;
|
||||
use common_grpc::flight::FlightDecoder;
|
||||
@@ -325,28 +325,27 @@ fn make_region_truncate(truncate: TruncateRequest) -> Result<Vec<(RegionId, Regi
|
||||
|
||||
/// Convert [BulkInsertRequest] to [RegionRequest] and group by [RegionId].
|
||||
fn make_region_bulk_inserts(request: BulkInsertRequest) -> Result<Vec<(RegionId, RegionRequest)>> {
|
||||
let region_id = request.region_id.into();
|
||||
let Some(Body::ArrowIpc(request)) = request.body else {
|
||||
return Ok(vec![]);
|
||||
};
|
||||
|
||||
let ArrowIpc {
|
||||
region_id,
|
||||
schema,
|
||||
payload,
|
||||
data_header,
|
||||
} = request;
|
||||
let decoder_timer = metrics::CONVERT_REGION_BULK_REQUEST
|
||||
.with_label_values(&["decode"])
|
||||
.start_timer();
|
||||
let mut decoder = FlightDecoder::try_from_schema_bytes(&schema).context(FlightCodecSnafu)?;
|
||||
let mut decoder =
|
||||
FlightDecoder::try_from_schema_bytes(&request.schema).context(FlightCodecSnafu)?;
|
||||
let payload = decoder
|
||||
.try_decode_record_batch(&data_header, &payload)
|
||||
.try_decode_record_batch(&request.data_header, &request.payload)
|
||||
.context(FlightCodecSnafu)?;
|
||||
decoder_timer.observe_duration();
|
||||
let region_id: RegionId = region_id.into();
|
||||
Ok(vec![(
|
||||
region_id,
|
||||
RegionRequest::BulkInserts(RegionBulkInsertsRequest { region_id, payload }),
|
||||
RegionRequest::BulkInserts(RegionBulkInsertsRequest {
|
||||
region_id,
|
||||
payload,
|
||||
raw_data: request,
|
||||
}),
|
||||
)])
|
||||
}
|
||||
|
||||
@@ -1137,6 +1136,7 @@ pub struct RegionSequencesRequest {
|
||||
pub struct RegionBulkInsertsRequest {
|
||||
pub region_id: RegionId,
|
||||
pub payload: DfRecordBatch,
|
||||
pub raw_data: ArrowIpc,
|
||||
}
|
||||
|
||||
impl RegionBulkInsertsRequest {
|
||||
|
||||
Reference in New Issue
Block a user