feat: log ingestion support (#4014)

* chore: add log http ingester scaffold

* chore: add some example code

* chore: add log inserter

* chore: add log handler file

* chore: add pipeline lib

* chore: import log handler

* chore: add pipelime http handler

* chore: add pipeline private table

* chore: add pipeline API

* chore: improve error handling

* chore: merge main

* chore: add multi content type support for log handler

* refactor: remove servers dep on pipeline

* refactor: move define_into_tonic_status to common-error

* refactor: bring in pipeline 3eb890c551b8d7f60c4491fcfec18966e2b210a4

* chore: fix typo

* refactor: bring in pipeline a95c9767d7056ab01dd8ca5fa1214456c6ffc72c

* chore: fix typo and license header

* refactor: move http event handler to a separate file

* chore: add test for pipeline

* chore: fmt

* refactor: bring in pipeline 7d2402701877901871dd1294a65ac937605a6a93

* refactor: move `pipeline_operator` to `pipeline` crate

* chore: minor update

* refactor: bring in pipeline 1711f4d46687bada72426d88cda417899e0ae3a4

* chore: add log

* chore: add log

* chore: remove open hook

* chore: minor update

* chore: fix fmt

* chore: minor update

* chore: rename desc for pipeline table

* refactor: remove updated_at in pipelines

* chore: add more content type support for log inserter api

* chore: introduce pipeline crate

* chore: update upload pipeline api

* chore: fix by pr commit

* chore: add some doc for pub fn/struct

* chore: some minro fix

* chore: add pipeline version support

* chore: impl log pipeline version

* chore: fix format issue

* fix: make the LogicalPlan of a query pipeline sorted in desc order

* chore: remove some debug log

* chore: replacing hashmap cache with moak

* chore: fix by pr commit

* chore: fix toml format issue

* chore: update Cargo.lock

* chore: fix by pr commit

* chore: fix some issue by pr commit

* chore: add more doc for pipeline version

---------

Co-authored-by: shuiyisong <xixing.sys@gmail.com>
This commit is contained in:
localhost
2024-06-15 01:03:30 +08:00
committed by GitHub
parent bf3ad44584
commit 01e3a24cf7
22 changed files with 1613 additions and 52 deletions

View File

@@ -66,6 +66,22 @@ pub struct Inserter {
pub type InserterRef = Arc<Inserter>;
enum AutoCreateTableType {
Logical(String),
Physical,
Log,
}
impl AutoCreateTableType {
fn as_str(&self) -> &'static str {
match self {
AutoCreateTableType::Logical(_) => "logical",
AutoCreateTableType::Physical => "physical",
AutoCreateTableType::Log => "log",
}
}
}
impl Inserter {
pub fn new(
catalog_manager: CatalogManagerRef,
@@ -108,7 +124,42 @@ impl Inserter {
validate_column_count_match(&requests)?;
let table_name_to_ids = self
.create_or_alter_tables_on_demand(&requests, &ctx, None, statement_executor)
.create_or_alter_tables_on_demand(
&requests,
&ctx,
AutoCreateTableType::Physical,
statement_executor,
)
.await?;
let inserts = RowToRegion::new(table_name_to_ids, self.partition_manager.as_ref())
.convert(requests)
.await?;
self.do_request(inserts, &ctx).await
}
pub async fn handle_log_inserts(
&self,
mut requests: RowInsertRequests,
ctx: QueryContextRef,
statement_executor: &StatementExecutor,
) -> Result<Output> {
// remove empty requests
requests.inserts.retain(|req| {
req.rows
.as_ref()
.map(|r| !r.rows.is_empty())
.unwrap_or_default()
});
validate_column_count_match(&requests)?;
let table_name_to_ids = self
.create_or_alter_tables_on_demand(
&requests,
&ctx,
AutoCreateTableType::Log,
statement_executor,
)
.await?;
let inserts = RowToRegion::new(table_name_to_ids, self.partition_manager.as_ref())
.convert(requests)
@@ -143,7 +194,7 @@ impl Inserter {
.create_or_alter_tables_on_demand(
&requests,
&ctx,
Some(physical_table.to_string()),
AutoCreateTableType::Logical(physical_table.to_string()),
statement_executor,
)
.await?;
@@ -380,12 +431,15 @@ impl Inserter {
&self,
requests: &RowInsertRequests,
ctx: &QueryContextRef,
on_physical_table: Option<String>,
auto_create_table_type: AutoCreateTableType,
statement_executor: &StatementExecutor,
) -> Result<HashMap<String, TableId>> {
let mut table_name_to_ids = HashMap::with_capacity(requests.inserts.len());
let mut create_tables = vec![];
let mut alter_tables = vec![];
let _timer = crate::metrics::CREATE_ALTER_ON_DEMAND
.with_label_values(&[auto_create_table_type.as_str()])
.start_timer();
for req in &requests.inserts {
let catalog = ctx.current_catalog();
let schema = ctx.current_schema();
@@ -407,42 +461,56 @@ impl Inserter {
}
}
if let Some(on_physical_table) = on_physical_table {
if !create_tables.is_empty() {
// Creates logical tables in batch.
let tables = self
.create_logical_tables(
create_tables,
ctx,
&on_physical_table,
statement_executor,
)
.await?;
match auto_create_table_type {
AutoCreateTableType::Logical(on_physical_table) => {
if !create_tables.is_empty() {
// Creates logical tables in batch.
let tables = self
.create_logical_tables(
create_tables,
ctx,
&on_physical_table,
statement_executor,
)
.await?;
for table in tables {
for table in tables {
let table_info = table.table_info();
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
}
}
if !alter_tables.is_empty() {
// Alter logical tables in batch.
statement_executor
.alter_logical_tables(alter_tables, ctx.clone())
.await?;
}
}
AutoCreateTableType::Physical => {
for req in create_tables {
let table = self.create_table(req, ctx, statement_executor).await?;
let table_info = table.table_info();
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
}
for alter_expr in alter_tables.into_iter() {
statement_executor
.alter_table_inner(alter_expr, ctx.clone())
.await?;
}
}
if !alter_tables.is_empty() {
// Alter logical tables in batch.
statement_executor
.alter_logical_tables(alter_tables, ctx.clone())
.await?;
}
} else {
for req in create_tables {
let table = self.create_table(req, ctx, statement_executor).await?;
let table_info = table.table_info();
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
}
for alter_expr in alter_tables.into_iter() {
statement_executor
.alter_table_inner(alter_expr, ctx.clone())
.await?;
AutoCreateTableType::Log => {
for req in create_tables {
let table = self.create_log_table(req, ctx, statement_executor).await?;
let table_info = table.table_info();
table_name_to_ids.insert(table_info.name.clone(), table_info.table_id());
}
for alter_expr in alter_tables.into_iter() {
statement_executor
.alter_table_inner(alter_expr, ctx.clone())
.await?;
}
}
}
Ok(table_name_to_ids)
}
@@ -568,17 +636,45 @@ impl Inserter {
match res {
Ok(table) => {
info!(
"Successfully created table {}.{}.{}",
table_ref.catalog, table_ref.schema, table_ref.table,
);
info!("Successfully created table {}", table_ref,);
Ok(table)
}
Err(err) => {
error!(
"Failed to create table {}.{}.{}: {}",
table_ref.catalog, table_ref.schema, table_ref.table, err
);
error!(err; "Failed to create table {}", table_ref);
Err(err)
}
}
}
async fn create_log_table(
&self,
req: &RowInsertRequest,
ctx: &QueryContextRef,
statement_executor: &StatementExecutor,
) -> Result<TableRef> {
let table_ref =
TableReference::full(ctx.current_catalog(), ctx.current_schema(), &req.table_name);
// SAFETY: `req.rows` is guaranteed to be `Some` by `handle_log_inserts`.
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
let create_table_expr = &mut build_create_table_expr(&table_ref, request_schema)?;
info!("Table `{table_ref}` does not exist, try creating the log table");
// Set append_mode to true for log table.
// because log tables should keep rows with the same ts and tags.
create_table_expr
.table_options
.insert("append_mode".to_string(), "true".to_string());
let res = statement_executor
.create_table_inner(create_table_expr, None, ctx.clone())
.await;
match res {
Ok(table) => {
info!("Successfully created a log table {}", table_ref);
Ok(table)
}
Err(err) => {
error!(err; "Failed to create a log table {}", table_ref);
Err(err)
}
}

View File

@@ -51,4 +51,10 @@ lazy_static! {
"DDL operator create view"
)
.unwrap();
pub static ref CREATE_ALTER_ON_DEMAND: HistogramVec = register_histogram_vec!(
"greptime_table_operator_create_alter_on_demand",
"table operator duration to create or alter tables on demand",
&["table_type"]
)
.unwrap();
}

View File

@@ -73,6 +73,8 @@ pub struct StatementExecutor {
inserter: InserterRef,
}
pub type StatementExecutorRef = Arc<StatementExecutor>;
impl StatementExecutor {
pub fn new(
catalog_manager: CatalogManagerRef,