mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-19 22:40:40 +00:00
feat: log ingestion support (#4014)
* chore: add log http ingester scaffold * chore: add some example code * chore: add log inserter * chore: add log handler file * chore: add pipeline lib * chore: import log handler * chore: add pipelime http handler * chore: add pipeline private table * chore: add pipeline API * chore: improve error handling * chore: merge main * chore: add multi content type support for log handler * refactor: remove servers dep on pipeline * refactor: move define_into_tonic_status to common-error * refactor: bring in pipeline 3eb890c551b8d7f60c4491fcfec18966e2b210a4 * chore: fix typo * refactor: bring in pipeline a95c9767d7056ab01dd8ca5fa1214456c6ffc72c * chore: fix typo and license header * refactor: move http event handler to a separate file * chore: add test for pipeline * chore: fmt * refactor: bring in pipeline 7d2402701877901871dd1294a65ac937605a6a93 * refactor: move `pipeline_operator` to `pipeline` crate * chore: minor update * refactor: bring in pipeline 1711f4d46687bada72426d88cda417899e0ae3a4 * chore: add log * chore: add log * chore: remove open hook * chore: minor update * chore: fix fmt * chore: minor update * chore: rename desc for pipeline table * refactor: remove updated_at in pipelines * chore: add more content type support for log inserter api * chore: introduce pipeline crate * chore: update upload pipeline api * chore: fix by pr commit * chore: add some doc for pub fn/struct * chore: some minro fix * chore: add pipeline version support * chore: impl log pipeline version * chore: fix format issue * fix: make the LogicalPlan of a query pipeline sorted in desc order * chore: remove some debug log * chore: replacing hashmap cache with moak * chore: fix by pr commit * chore: fix toml format issue * chore: update Cargo.lock * chore: fix by pr commit * chore: fix some issue by pr commit * chore: add more doc for pipeline version --------- Co-authored-by: shuiyisong <xixing.sys@gmail.com>
This commit is contained in:
@@ -66,6 +66,22 @@ pub struct Inserter {
|
||||
|
||||
pub type InserterRef = Arc<Inserter>;
|
||||
|
||||
enum AutoCreateTableType {
|
||||
Logical(String),
|
||||
Physical,
|
||||
Log,
|
||||
}
|
||||
|
||||
impl AutoCreateTableType {
|
||||
fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
AutoCreateTableType::Logical(_) => "logical",
|
||||
AutoCreateTableType::Physical => "physical",
|
||||
AutoCreateTableType::Log => "log",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Inserter {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
@@ -108,7 +124,42 @@ impl Inserter {
|
||||
validate_column_count_match(&requests)?;
|
||||
|
||||
let table_name_to_ids = self
|
||||
.create_or_alter_tables_on_demand(&requests, &ctx, None, statement_executor)
|
||||
.create_or_alter_tables_on_demand(
|
||||
&requests,
|
||||
&ctx,
|
||||
AutoCreateTableType::Physical,
|
||||
statement_executor,
|
||||
)
|
||||
.await?;
|
||||
let inserts = RowToRegion::new(table_name_to_ids, self.partition_manager.as_ref())
|
||||
.convert(requests)
|
||||
.await?;
|
||||
|
||||
self.do_request(inserts, &ctx).await
|
||||
}
|
||||
|
||||
pub async fn handle_log_inserts(
|
||||
&self,
|
||||
mut requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
// remove empty requests
|
||||
requests.inserts.retain(|req| {
|
||||
req.rows
|
||||
.as_ref()
|
||||
.map(|r| !r.rows.is_empty())
|
||||
.unwrap_or_default()
|
||||
});
|
||||
validate_column_count_match(&requests)?;
|
||||
|
||||
let table_name_to_ids = self
|
||||
.create_or_alter_tables_on_demand(
|
||||
&requests,
|
||||
&ctx,
|
||||
AutoCreateTableType::Log,
|
||||
statement_executor,
|
||||
)
|
||||
.await?;
|
||||
let inserts = RowToRegion::new(table_name_to_ids, self.partition_manager.as_ref())
|
||||
.convert(requests)
|
||||
@@ -143,7 +194,7 @@ impl Inserter {
|
||||
.create_or_alter_tables_on_demand(
|
||||
&requests,
|
||||
&ctx,
|
||||
Some(physical_table.to_string()),
|
||||
AutoCreateTableType::Logical(physical_table.to_string()),
|
||||
statement_executor,
|
||||
)
|
||||
.await?;
|
||||
@@ -380,12 +431,15 @@ impl Inserter {
|
||||
&self,
|
||||
requests: &RowInsertRequests,
|
||||
ctx: &QueryContextRef,
|
||||
on_physical_table: Option<String>,
|
||||
auto_create_table_type: AutoCreateTableType,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<HashMap<String, TableId>> {
|
||||
let mut table_name_to_ids = HashMap::with_capacity(requests.inserts.len());
|
||||
let mut create_tables = vec![];
|
||||
let mut alter_tables = vec![];
|
||||
let _timer = crate::metrics::CREATE_ALTER_ON_DEMAND
|
||||
.with_label_values(&[auto_create_table_type.as_str()])
|
||||
.start_timer();
|
||||
for req in &requests.inserts {
|
||||
let catalog = ctx.current_catalog();
|
||||
let schema = ctx.current_schema();
|
||||
@@ -407,42 +461,56 @@ impl Inserter {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(on_physical_table) = on_physical_table {
|
||||
if !create_tables.is_empty() {
|
||||
// Creates logical tables in batch.
|
||||
let tables = self
|
||||
.create_logical_tables(
|
||||
create_tables,
|
||||
ctx,
|
||||
&on_physical_table,
|
||||
statement_executor,
|
||||
)
|
||||
.await?;
|
||||
match auto_create_table_type {
|
||||
AutoCreateTableType::Logical(on_physical_table) => {
|
||||
if !create_tables.is_empty() {
|
||||
// Creates logical tables in batch.
|
||||
let tables = self
|
||||
.create_logical_tables(
|
||||
create_tables,
|
||||
ctx,
|
||||
&on_physical_table,
|
||||
statement_executor,
|
||||
)
|
||||
.await?;
|
||||
|
||||
for table in tables {
|
||||
for table in tables {
|
||||
let table_info = table.table_info();
|
||||
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
|
||||
}
|
||||
}
|
||||
if !alter_tables.is_empty() {
|
||||
// Alter logical tables in batch.
|
||||
statement_executor
|
||||
.alter_logical_tables(alter_tables, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
AutoCreateTableType::Physical => {
|
||||
for req in create_tables {
|
||||
let table = self.create_table(req, ctx, statement_executor).await?;
|
||||
let table_info = table.table_info();
|
||||
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
|
||||
}
|
||||
for alter_expr in alter_tables.into_iter() {
|
||||
statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
if !alter_tables.is_empty() {
|
||||
// Alter logical tables in batch.
|
||||
statement_executor
|
||||
.alter_logical_tables(alter_tables, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
} else {
|
||||
for req in create_tables {
|
||||
let table = self.create_table(req, ctx, statement_executor).await?;
|
||||
let table_info = table.table_info();
|
||||
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
|
||||
}
|
||||
for alter_expr in alter_tables.into_iter() {
|
||||
statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
.await?;
|
||||
AutoCreateTableType::Log => {
|
||||
for req in create_tables {
|
||||
let table = self.create_log_table(req, ctx, statement_executor).await?;
|
||||
let table_info = table.table_info();
|
||||
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
|
||||
}
|
||||
for alter_expr in alter_tables.into_iter() {
|
||||
statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(table_name_to_ids)
|
||||
}
|
||||
|
||||
@@ -568,17 +636,45 @@ impl Inserter {
|
||||
|
||||
match res {
|
||||
Ok(table) => {
|
||||
info!(
|
||||
"Successfully created table {}.{}.{}",
|
||||
table_ref.catalog, table_ref.schema, table_ref.table,
|
||||
);
|
||||
info!("Successfully created table {}", table_ref,);
|
||||
Ok(table)
|
||||
}
|
||||
Err(err) => {
|
||||
error!(
|
||||
"Failed to create table {}.{}.{}: {}",
|
||||
table_ref.catalog, table_ref.schema, table_ref.table, err
|
||||
);
|
||||
error!(err; "Failed to create table {}", table_ref);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_log_table(
|
||||
&self,
|
||||
req: &RowInsertRequest,
|
||||
ctx: &QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<TableRef> {
|
||||
let table_ref =
|
||||
TableReference::full(ctx.current_catalog(), ctx.current_schema(), &req.table_name);
|
||||
// SAFETY: `req.rows` is guaranteed to be `Some` by `handle_log_inserts`.
|
||||
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
|
||||
let create_table_expr = &mut build_create_table_expr(&table_ref, request_schema)?;
|
||||
|
||||
info!("Table `{table_ref}` does not exist, try creating the log table");
|
||||
// Set append_mode to true for log table.
|
||||
// because log tables should keep rows with the same ts and tags.
|
||||
create_table_expr
|
||||
.table_options
|
||||
.insert("append_mode".to_string(), "true".to_string());
|
||||
let res = statement_executor
|
||||
.create_table_inner(create_table_expr, None, ctx.clone())
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(table) => {
|
||||
info!("Successfully created a log table {}", table_ref);
|
||||
Ok(table)
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to create a log table {}", table_ref);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,4 +51,10 @@ lazy_static! {
|
||||
"DDL operator create view"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref CREATE_ALTER_ON_DEMAND: HistogramVec = register_histogram_vec!(
|
||||
"greptime_table_operator_create_alter_on_demand",
|
||||
"table operator duration to create or alter tables on demand",
|
||||
&["table_type"]
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
@@ -73,6 +73,8 @@ pub struct StatementExecutor {
|
||||
inserter: InserterRef,
|
||||
}
|
||||
|
||||
pub type StatementExecutorRef = Arc<StatementExecutor>;
|
||||
|
||||
impl StatementExecutor {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
|
||||
Reference in New Issue
Block a user