feat: log ingestion support (#4014)

* chore: add log http ingester scaffold

* chore: add some example code

* chore: add log inserter

* chore: add log handler file

* chore: add pipeline lib

* chore: import log handler

* chore: add pipelime http handler

* chore: add pipeline private table

* chore: add pipeline API

* chore: improve error handling

* chore: merge main

* chore: add multi content type support for log handler

* refactor: remove servers dep on pipeline

* refactor: move define_into_tonic_status to common-error

* refactor: bring in pipeline 3eb890c551b8d7f60c4491fcfec18966e2b210a4

* chore: fix typo

* refactor: bring in pipeline a95c9767d7056ab01dd8ca5fa1214456c6ffc72c

* chore: fix typo and license header

* refactor: move http event handler to a separate file

* chore: add test for pipeline

* chore: fmt

* refactor: bring in pipeline 7d2402701877901871dd1294a65ac937605a6a93

* refactor: move `pipeline_operator` to `pipeline` crate

* chore: minor update

* refactor: bring in pipeline 1711f4d46687bada72426d88cda417899e0ae3a4

* chore: add log

* chore: add log

* chore: remove open hook

* chore: minor update

* chore: fix fmt

* chore: minor update

* chore: rename desc for pipeline table

* refactor: remove updated_at in pipelines

* chore: add more content type support for log inserter api

* chore: introduce pipeline crate

* chore: update upload pipeline api

* chore: fix by pr commit

* chore: add some doc for pub fn/struct

* chore: some minro fix

* chore: add pipeline version support

* chore: impl log pipeline version

* chore: fix format issue

* fix: make the LogicalPlan of a query pipeline sorted in desc order

* chore: remove some debug log

* chore: replacing hashmap cache with moak

* chore: fix by pr commit

* chore: fix toml format issue

* chore: update Cargo.lock

* chore: fix by pr commit

* chore: fix some issue by pr commit

* chore: add more doc for pipeline version

---------

Co-authored-by: shuiyisong <xixing.sys@gmail.com>
This commit is contained in:
localhost
2024-06-15 01:03:30 +08:00
committed by GitHub
parent bf3ad44584
commit 01e3a24cf7
22 changed files with 1613 additions and 52 deletions

View File

@@ -44,6 +44,7 @@ meta-client.workspace = true
opentelemetry-proto.workspace = true
operator.workspace = true
partition.workspace = true
pipeline.workspace = true
prometheus.workspace = true
prost.workspace = true
query.workspace = true
@@ -62,11 +63,13 @@ toml.workspace = true
tonic.workspace = true
[dev-dependencies]
catalog.workspace = true
catalog = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
datanode.workspace = true
datatypes.workspace = true
futures = "0.3"
meta-srv = { workspace = true, features = ["mock"] }
serde_json.workspace = true
strfmt = "0.2"
tower.workspace = true
uuid.workspace = true

View File

@@ -15,6 +15,7 @@
pub mod builder;
mod grpc;
mod influxdb;
mod log_handler;
mod opentsdb;
mod otlp;
mod prom_store;
@@ -48,6 +49,7 @@ use meta_client::MetaClientOptions;
use operator::delete::DeleterRef;
use operator::insert::InserterRef;
use operator::statement::StatementExecutor;
use pipeline::pipeline_operator::PipelineOperator;
use prometheus::HistogramTimer;
use query::metrics::OnDone;
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
@@ -66,7 +68,7 @@ use servers::prometheus_handler::PrometheusHandler;
use servers::query_handler::grpc::GrpcQueryHandler;
use servers::query_handler::sql::SqlQueryHandler;
use servers::query_handler::{
InfluxdbLineProtocolHandler, OpenTelemetryProtocolHandler, OpentsdbProtocolHandler,
InfluxdbLineProtocolHandler, LogHandler, OpenTelemetryProtocolHandler, OpentsdbProtocolHandler,
PromStoreProtocolHandler, ScriptHandler,
};
use servers::server::ServerHandlers;
@@ -100,6 +102,7 @@ pub trait FrontendInstance:
+ OpenTelemetryProtocolHandler
+ ScriptHandler
+ PrometheusHandler
+ LogHandler
+ Send
+ Sync
+ 'static
@@ -108,12 +111,12 @@ pub trait FrontendInstance:
}
pub type FrontendInstanceRef = Arc<dyn FrontendInstance>;
pub type StatementExecutorRef = Arc<StatementExecutor>;
#[derive(Clone)]
pub struct Instance {
catalog_manager: CatalogManagerRef,
script_executor: Arc<ScriptExecutor>,
pipeline_operator: Arc<PipelineOperator>,
statement_executor: Arc<StatementExecutor>,
query_engine: QueryEngineRef,
plugins: Plugins,

View File

@@ -27,9 +27,10 @@ use operator::delete::Deleter;
use operator::insert::Inserter;
use operator::procedure::ProcedureServiceOperator;
use operator::request::Requester;
use operator::statement::StatementExecutor;
use operator::statement::{StatementExecutor, StatementExecutorRef};
use operator::table::TableMutationOperator;
use partition::manager::PartitionRuleManager;
use pipeline::pipeline_operator::PipelineOperator;
use query::QueryEngineFactory;
use servers::server::ServerHandlers;
use snafu::OptionExt;
@@ -37,7 +38,7 @@ use snafu::OptionExt;
use crate::error::{self, Result};
use crate::heartbeat::HeartbeatTask;
use crate::instance::region_query::FrontendRegionQueryHandler;
use crate::instance::{Instance, StatementExecutorRef};
use crate::instance::Instance;
use crate::script::ScriptExecutor;
/// The frontend [`Instance`] builder.
@@ -172,11 +173,19 @@ impl FrontendBuilder {
table_route_cache,
));
let pipeline_operator = Arc::new(PipelineOperator::new(
inserter.clone(),
statement_executor.clone(),
self.catalog_manager.clone(),
query_engine.clone(),
));
plugins.insert::<StatementExecutorRef>(statement_executor.clone());
Ok(Instance {
catalog_manager: self.catalog_manager,
script_executor,
pipeline_operator,
statement_executor,
query_engine,
plugins,

View File

@@ -0,0 +1,93 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::RowInsertRequests;
use async_trait::async_trait;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
use client::Output;
use common_error::ext::BoxedError;
use pipeline::table::PipelineVersion;
use pipeline::{GreptimeTransformer, Pipeline};
use servers::error::{
AuthSnafu, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult,
UnsupportedDeletePipelineSnafu,
};
use servers::query_handler::LogHandler;
use session::context::QueryContextRef;
use snafu::ResultExt;
use crate::instance::Instance;
#[async_trait]
impl LogHandler for Instance {
async fn insert_logs(
&self,
log: RowInsertRequests,
ctx: QueryContextRef,
) -> ServerResult<Output> {
self.plugins
.get::<PermissionCheckerRef>()
.as_ref()
.check_permission(ctx.current_user(), PermissionReq::LogWrite)
.context(AuthSnafu)?;
self.handle_log_inserts(log, ctx).await
}
async fn get_pipeline(
&self,
name: &str,
version: PipelineVersion,
query_ctx: QueryContextRef,
) -> ServerResult<Arc<Pipeline<GreptimeTransformer>>> {
self.pipeline_operator
.get_pipeline(query_ctx, name, version)
.await
.context(PipelineSnafu)
}
async fn insert_pipeline(
&self,
name: &str,
content_type: &str,
pipeline: &str,
query_ctx: QueryContextRef,
) -> ServerResult<()> {
self.pipeline_operator
.insert_pipeline(name, content_type, pipeline, query_ctx)
.await
.context(PipelineSnafu)
}
async fn delete_pipeline(&self, _name: &str, _query_ctx: QueryContextRef) -> ServerResult<()> {
// TODO(qtang): impl delete
Err(UnsupportedDeletePipelineSnafu {}.build())
}
}
impl Instance {
pub async fn handle_log_inserts(
&self,
log: RowInsertRequests,
ctx: QueryContextRef,
) -> ServerResult<Output> {
self.inserter
.handle_log_inserts(log, ctx, self.statement_executor.as_ref())
.await
.map_err(BoxedError::new)
.context(ExecuteGrpcRequestSnafu)
}
}

View File

@@ -90,6 +90,8 @@ where
Some(self.instance.clone()),
);
builder = builder.with_log_ingest_handler(self.instance.clone());
if let Some(user_provider) = self.plugins.get::<UserProviderRef>() {
builder = builder.with_user_provider(user_provider);
}