feat(flow): mirror insert req to flow node (#3858)

* feat: mirror insert req to flow node

* refactor: group_requests_by_peer

* chore: rename `nodes` to `flows` to be more apt

* docs: add TODO

* refactor: split flow&data node grouping to two func

* refactor: mirror_flow_node_request

* chore: add some TODOs

* refactor: use Option in value

* feat: skip non-src table quickly

* docs: add TODO for  `Peer.address`

* fix: dedup
This commit is contained in:
discord9
2024-05-06 19:33:14 +08:00
committed by GitHub
parent f3b68253c2
commit 573c19be32
8 changed files with 135 additions and 15 deletions

View File

@@ -15,6 +15,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use api::region::RegionResponse;
use api::v1::alter_expr::Kind;
use api::v1::region::{InsertRequests as RegionInsertRequests, RegionRequestHeader};
use api::v1::{
@@ -25,6 +26,7 @@ use catalog::CatalogManagerRef;
use client::{OutputData, OutputMeta};
use common_catalog::consts::default_engine;
use common_grpc_expr::util::{extract_new_columns, ColumnExpr};
use common_meta::key::flow::TableFlowManagerRef;
use common_meta::node_manager::{AffectedRows, NodeManagerRef};
use common_meta::peer::Peer;
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
@@ -32,15 +34,17 @@ use common_query::Output;
use common_telemetry::tracing_context::TracingContext;
use common_telemetry::{error, info};
use datatypes::schema::Schema;
use futures_util::future;
use futures_util::{future, TryStreamExt};
use meter_macros::write_meter;
use partition::manager::PartitionRuleManagerRef;
use session::context::QueryContextRef;
use snafu::prelude::*;
use snafu::ResultExt;
use sql::statements::insert::Insert;
use store_api::metric_engine_consts::{
LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY,
};
use store_api::storage::{RegionId, TableId};
use table::requests::InsertRequest as TableInsertRequest;
use table::table_reference::TableReference;
use table::TableRef;
@@ -58,6 +62,7 @@ pub struct Inserter {
catalog_manager: CatalogManagerRef,
partition_manager: PartitionRuleManagerRef,
node_manager: NodeManagerRef,
table_flow_manager: TableFlowManagerRef,
}
pub type InserterRef = Arc<Inserter>;
@@ -67,11 +72,13 @@ impl Inserter {
catalog_manager: CatalogManagerRef,
partition_manager: PartitionRuleManagerRef,
node_manager: NodeManagerRef,
table_flow_manager: TableFlowManagerRef,
) -> Self {
Self {
catalog_manager,
partition_manager,
node_manager,
table_flow_manager,
}
}
@@ -199,13 +206,34 @@ impl Inserter {
..Default::default()
});
// spawn all tasks that do job for mirror insert requests for flownode
let flow_tasks = self
.mirror_flow_node_requests(&requests)
.await?
.into_iter()
.map(|(peer, inserts)| {
let node_manager = self.node_manager.clone();
common_runtime::spawn_write(async move {
node_manager
.flownode(&peer)
.await
.handle_inserts(inserts)
.await
.map(|flow_response| RegionResponse {
affected_rows: flow_response.affected_rows as AffectedRows,
extension: flow_response.extension,
})
.context(RequestInsertsSnafu)
})
});
let tasks = self
.group_requests_by_peer(requests)
.await?
.into_iter()
.map(|(peer, inserts)| {
let request = request_factory.build_insert(inserts);
let node_manager = self.node_manager.clone();
let request = request_factory.build_insert(inserts);
common_runtime::spawn_write(async move {
node_manager
.datanode(&peer)
@@ -214,7 +242,8 @@ impl Inserter {
.await
.context(RequestInsertsSnafu)
})
});
})
.chain(flow_tasks);
let results = future::try_join_all(tasks).await.context(JoinTaskSnafu)?;
let affected_rows = results
@@ -228,19 +257,93 @@ impl Inserter {
))
}
/// Mirror requests for source table to flownode
async fn mirror_flow_node_requests(
&self,
requests: &RegionInsertRequests,
) -> Result<HashMap<Peer, RegionInsertRequests>> {
// store partial source table requests used by flow node(only store what's used)
let mut src_table_reqs: HashMap<TableId, Option<(Vec<Peer>, RegionInsertRequests)>> =
HashMap::new();
for req in &requests.requests {
match src_table_reqs.get_mut(&RegionId::from_u64(req.region_id).table_id()) {
Some(Some((_peers, reqs))) => reqs.requests.push(req.clone()),
// already know this is not source table
Some(None) => continue,
_ => {
let table_id = RegionId::from_u64(req.region_id).table_id();
let peers = self
.table_flow_manager
.flows(table_id)
// TODO(discord9): determine where to store the flow node address in distributed mode
.map_ok(|key| Peer::new(key.flownode_id(), ""))
.try_collect::<Vec<_>>()
.await
.map(|mut v| {
v.dedup();
v
})
.context(RequestInsertsSnafu)?;
if !peers.is_empty() {
let mut reqs = RegionInsertRequests::default();
reqs.requests.push(req.clone());
src_table_reqs.insert(table_id, Some((peers, reqs)));
} else {
// insert a empty entry to avoid repeat query
src_table_reqs.insert(table_id, None);
}
}
}
}
let mut inserts: HashMap<Peer, RegionInsertRequests> = HashMap::new();
for (_table_id, (peers, reqs)) in src_table_reqs
.into_iter()
.filter_map(|(k, v)| v.map(|v| (k, v)))
{
for flownode in peers {
inserts
.entry(flownode.clone())
.or_default()
.requests
.extend(reqs.requests.clone());
}
}
Ok(inserts)
}
async fn group_requests_by_peer(
&self,
requests: RegionInsertRequests,
) -> Result<HashMap<Peer, RegionInsertRequests>> {
let mut inserts: HashMap<Peer, RegionInsertRequests> = HashMap::new();
// group by region ids first to reduce repeatedly call `find_region_leader`
// TODO(discord9): determine if a addition clone is worth it
let mut requests_per_region: HashMap<RegionId, RegionInsertRequests> = HashMap::new();
for req in requests.requests {
let region_id = RegionId::from_u64(req.region_id);
requests_per_region
.entry(region_id)
.or_default()
.requests
.push(req);
}
let mut inserts: HashMap<Peer, RegionInsertRequests> = HashMap::new();
for (region_id, reqs) in requests_per_region {
let peer = self
.partition_manager
.find_region_leader(req.region_id.into())
.find_region_leader(region_id)
.await
.context(FindRegionLeaderSnafu)?;
inserts.entry(peer).or_default().requests.push(req);
inserts
.entry(peer)
.or_default()
.requests
.extend(reqs.requests);
}
Ok(inserts)