mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-29 19:30:37 +00:00
feat: log ingestion support (#4014)
* chore: add log http ingester scaffold * chore: add some example code * chore: add log inserter * chore: add log handler file * chore: add pipeline lib * chore: import log handler * chore: add pipelime http handler * chore: add pipeline private table * chore: add pipeline API * chore: improve error handling * chore: merge main * chore: add multi content type support for log handler * refactor: remove servers dep on pipeline * refactor: move define_into_tonic_status to common-error * refactor: bring in pipeline 3eb890c551b8d7f60c4491fcfec18966e2b210a4 * chore: fix typo * refactor: bring in pipeline a95c9767d7056ab01dd8ca5fa1214456c6ffc72c * chore: fix typo and license header * refactor: move http event handler to a separate file * chore: add test for pipeline * chore: fmt * refactor: bring in pipeline 7d2402701877901871dd1294a65ac937605a6a93 * refactor: move `pipeline_operator` to `pipeline` crate * chore: minor update * refactor: bring in pipeline 1711f4d46687bada72426d88cda417899e0ae3a4 * chore: add log * chore: add log * chore: remove open hook * chore: minor update * chore: fix fmt * chore: minor update * chore: rename desc for pipeline table * refactor: remove updated_at in pipelines * chore: add more content type support for log inserter api * chore: introduce pipeline crate * chore: update upload pipeline api * chore: fix by pr commit * chore: add some doc for pub fn/struct * chore: some minro fix * chore: add pipeline version support * chore: impl log pipeline version * chore: fix format issue * fix: make the LogicalPlan of a query pipeline sorted in desc order * chore: remove some debug log * chore: replacing hashmap cache with moak * chore: fix by pr commit * chore: fix toml format issue * chore: update Cargo.lock * chore: fix by pr commit * chore: fix some issue by pr commit * chore: add more doc for pipeline version --------- Co-authored-by: shuiyisong <xixing.sys@gmail.com>
This commit is contained in:
@@ -22,7 +22,7 @@ arrow-ipc.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-trait = "0.1"
|
||||
auth.workspace = true
|
||||
axum.workspace = true
|
||||
axum = { workspace = true, features = ["multipart"] }
|
||||
axum-macros = "0.3.8"
|
||||
base64.workspace = true
|
||||
bytes.workspace = true
|
||||
@@ -69,6 +69,7 @@ opentelemetry-proto.workspace = true
|
||||
parking_lot = "0.12"
|
||||
pgwire = "0.20"
|
||||
pin-project = "1.0"
|
||||
pipeline.workspace = true
|
||||
postgres-types = { version = "0.2", features = ["with-chrono-0_4"] }
|
||||
pprof = { version = "0.13", features = [
|
||||
"flamegraph",
|
||||
|
||||
@@ -27,6 +27,7 @@ use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_telemetry::{debug, error};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use headers::ContentType;
|
||||
use query::parser::PromQuery;
|
||||
use serde_json::json;
|
||||
use snafu::{Location, Snafu};
|
||||
@@ -148,6 +149,19 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Pipeline management api error"))]
|
||||
Pipeline {
|
||||
source: pipeline::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported delete pipeline."))]
|
||||
UnsupportedDeletePipeline {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute script by name: {}", name))]
|
||||
ExecuteScript {
|
||||
name: String,
|
||||
@@ -533,6 +547,27 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse payload as json"))]
|
||||
ParseJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert to structured log"))]
|
||||
ToStructuredLog {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported content type: {:?}", content_type))]
|
||||
UnsupportedContentType {
|
||||
content_type: ContentType,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode url"))]
|
||||
UrlDecode {
|
||||
#[snafu(source)]
|
||||
@@ -600,6 +635,7 @@ impl ErrorExt for Error {
|
||||
| FileWatch { .. } => StatusCode::Internal,
|
||||
|
||||
UnsupportedDataType { .. } => StatusCode::Unsupported,
|
||||
UnsupportedDeletePipeline { .. } => StatusCode::Unsupported,
|
||||
|
||||
#[cfg(not(windows))]
|
||||
UpdateJemallocMetrics { .. } => StatusCode::Internal,
|
||||
@@ -614,6 +650,8 @@ impl ErrorExt for Error {
|
||||
| ExecuteGrpcRequest { source, .. }
|
||||
| CheckDatabaseValidity { source, .. } => source.status_code(),
|
||||
|
||||
Pipeline { source, .. } => source.status_code(),
|
||||
|
||||
NotSupported { .. }
|
||||
| InvalidParameter { .. }
|
||||
| InvalidQuery { .. }
|
||||
@@ -637,6 +675,9 @@ impl ErrorExt for Error {
|
||||
| MissingQueryContext { .. }
|
||||
| MysqlValueConversion { .. }
|
||||
| UnexpectedPhysicalTable { .. }
|
||||
| ParseJson { .. }
|
||||
| ToStructuredLog { .. }
|
||||
| UnsupportedContentType { .. }
|
||||
| TimestampOverflow { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
RowWriter { source, .. }
|
||||
|
||||
@@ -67,12 +67,13 @@ use crate::metrics_handler::MetricsHandler;
|
||||
use crate::prometheus_handler::PrometheusHandlerRef;
|
||||
use crate::query_handler::sql::ServerSqlQueryHandlerRef;
|
||||
use crate::query_handler::{
|
||||
InfluxdbLineProtocolHandlerRef, OpenTelemetryProtocolHandlerRef, OpentsdbProtocolHandlerRef,
|
||||
PromStoreProtocolHandlerRef, ScriptHandlerRef,
|
||||
InfluxdbLineProtocolHandlerRef, LogHandlerRef, OpenTelemetryProtocolHandlerRef,
|
||||
OpentsdbProtocolHandlerRef, PromStoreProtocolHandlerRef, ScriptHandlerRef,
|
||||
};
|
||||
use crate::server::Server;
|
||||
|
||||
pub mod authorize;
|
||||
pub mod event;
|
||||
pub mod handler;
|
||||
pub mod header;
|
||||
pub mod influxdb;
|
||||
@@ -587,6 +588,16 @@ impl HttpServerBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_log_ingest_handler(self, handler: LogHandlerRef) -> Self {
|
||||
Self {
|
||||
router: self.router.nest(
|
||||
&format!("/{HTTP_API_VERSION}/events"),
|
||||
HttpServer::route_log(handler),
|
||||
),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_plugins(self, plugins: Plugins) -> Self {
|
||||
Self { plugins, ..self }
|
||||
}
|
||||
@@ -699,6 +710,21 @@ impl HttpServer {
|
||||
.with_state(metrics_handler)
|
||||
}
|
||||
|
||||
fn route_log<S>(log_handler: LogHandlerRef) -> Router<S> {
|
||||
Router::new()
|
||||
.route("/logs", routing::post(event::log_ingester))
|
||||
.route(
|
||||
"/pipelines/:pipeline_name",
|
||||
routing::post(event::add_pipeline),
|
||||
)
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
.layer(HandleErrorLayer::new(handle_error))
|
||||
.layer(RequestDecompressionLayer::new()),
|
||||
)
|
||||
.with_state(log_handler)
|
||||
}
|
||||
|
||||
fn route_sql<S>(api_state: ApiState) -> ApiRouter<S> {
|
||||
ApiRouter::new()
|
||||
.api_route(
|
||||
|
||||
257
src/servers/src/http/event.rs
Normal file
257
src/servers/src/http/event.rs
Normal file
@@ -0,0 +1,257 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
|
||||
use axum::body::HttpBody;
|
||||
use axum::extract::{FromRequest, Multipart, Path, Query, State};
|
||||
use axum::headers::ContentType;
|
||||
use axum::http::header::CONTENT_TYPE;
|
||||
use axum::http::{Request, StatusCode};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::{async_trait, BoxError, Extension, TypedHeader};
|
||||
use common_telemetry::{error, warn};
|
||||
use common_time::Timestamp;
|
||||
use datatypes::timestamp::TimestampNanosecond;
|
||||
use mime_guess::mime;
|
||||
use pipeline::error::{CastTypeSnafu, PipelineTransformSnafu};
|
||||
use pipeline::table::PipelineVersion;
|
||||
use pipeline::Value as PipelineValue;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Deserializer, Value};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu, Result, UnsupportedContentTypeSnafu,
|
||||
};
|
||||
use crate::http::greptime_result_v1::GreptimedbV1Response;
|
||||
use crate::http::HttpResponse;
|
||||
use crate::query_handler::LogHandlerRef;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct LogIngesterQueryParams {
|
||||
pub table: Option<String>,
|
||||
pub db: Option<String>,
|
||||
pub pipeline_name: Option<String>,
|
||||
pub ignore_errors: Option<bool>,
|
||||
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
pub struct PipelineContent(String);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, B> FromRequest<S, B> for PipelineContent
|
||||
where
|
||||
B: HttpBody + Send + 'static,
|
||||
B::Data: Send,
|
||||
bytes::Bytes: std::convert::From<<B as HttpBody>::Data>,
|
||||
B::Error: Into<BoxError>,
|
||||
S: Send + Sync,
|
||||
{
|
||||
type Rejection = Response;
|
||||
|
||||
async fn from_request(req: Request<B>, state: &S) -> StdResult<Self, Self::Rejection> {
|
||||
let content_type_header = req.headers().get(CONTENT_TYPE);
|
||||
let content_type = content_type_header.and_then(|value| value.to_str().ok());
|
||||
if let Some(content_type) = content_type {
|
||||
if content_type.ends_with("yaml") {
|
||||
let payload = String::from_request(req, state)
|
||||
.await
|
||||
.map_err(IntoResponse::into_response)?;
|
||||
return Ok(Self(payload));
|
||||
}
|
||||
|
||||
if content_type.starts_with("multipart/form-data") {
|
||||
let mut payload: Multipart = Multipart::from_request(req, state)
|
||||
.await
|
||||
.map_err(IntoResponse::into_response)?;
|
||||
let file = payload
|
||||
.next_field()
|
||||
.await
|
||||
.map_err(IntoResponse::into_response)?;
|
||||
let payload = file
|
||||
.ok_or(StatusCode::UNSUPPORTED_MEDIA_TYPE.into_response())?
|
||||
.text()
|
||||
.await
|
||||
.map_err(IntoResponse::into_response)?;
|
||||
return Ok(Self(payload));
|
||||
}
|
||||
}
|
||||
|
||||
Err(StatusCode::UNSUPPORTED_MEDIA_TYPE.into_response())
|
||||
}
|
||||
}
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn add_pipeline(
|
||||
State(handler): State<LogHandlerRef>,
|
||||
Path(pipeline_name): Path<String>,
|
||||
Extension(query_ctx): Extension<QueryContextRef>,
|
||||
PipelineContent(payload): PipelineContent,
|
||||
) -> Result<String> {
|
||||
if pipeline_name.is_empty() {
|
||||
return Err(InvalidParameterSnafu {
|
||||
reason: "pipeline_name is required in path",
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
if payload.is_empty() {
|
||||
return Err(InvalidParameterSnafu {
|
||||
reason: "pipeline is required in body",
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
let content_type = "yaml";
|
||||
let result = handler
|
||||
.insert_pipeline(&pipeline_name, content_type, &payload, query_ctx)
|
||||
.await;
|
||||
|
||||
result.map(|_| "ok".to_string()).map_err(|e| {
|
||||
error!(e; "failed to insert pipeline");
|
||||
e
|
||||
})
|
||||
}
|
||||
|
||||
/// Transform NDJSON array into a single array
|
||||
fn transform_ndjson_array_factory(
|
||||
values: impl IntoIterator<Item = StdResult<Value, serde_json::Error>>,
|
||||
ignore_error: bool,
|
||||
) -> Result<Value> {
|
||||
values.into_iter().try_fold(
|
||||
Value::Array(Vec::with_capacity(100)),
|
||||
|acc, item| match acc {
|
||||
Value::Array(mut acc_array) => {
|
||||
if let Ok(item_value) = item {
|
||||
match item_value {
|
||||
Value::Array(item_array) => {
|
||||
acc_array.extend(item_array);
|
||||
}
|
||||
Value::Object(_) => {
|
||||
acc_array.push(item_value);
|
||||
}
|
||||
_ => {
|
||||
if !ignore_error {
|
||||
warn!("invalid item in array: {:?}", item_value);
|
||||
return InvalidParameterSnafu {
|
||||
reason: format!("invalid item:{} in array", item_value),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Value::Array(acc_array))
|
||||
} else if !ignore_error {
|
||||
item.context(ParseJsonSnafu)
|
||||
} else {
|
||||
warn!("invalid item in array: {:?}", item);
|
||||
Ok(Value::Array(acc_array))
|
||||
}
|
||||
}
|
||||
_ => unreachable!("invalid acc: {:?}", acc),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn log_ingester(
|
||||
State(handler): State<LogHandlerRef>,
|
||||
Query(query_params): Query<LogIngesterQueryParams>,
|
||||
Extension(query_ctx): Extension<QueryContextRef>,
|
||||
TypedHeader(content_type): TypedHeader<ContentType>,
|
||||
payload: String,
|
||||
) -> Result<HttpResponse> {
|
||||
let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu {
|
||||
reason: "pipeline_name is required",
|
||||
})?;
|
||||
let table_name = query_params.table.context(InvalidParameterSnafu {
|
||||
reason: "table is required",
|
||||
})?;
|
||||
|
||||
let version = match query_params.version {
|
||||
Some(version) => {
|
||||
let ts = Timestamp::from_str_utc(&version).map_err(|e| {
|
||||
InvalidParameterSnafu {
|
||||
reason: format!("invalid pipeline version: {} with error: {}", &version, e),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
Some(TimestampNanosecond(ts))
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let ignore_errors = query_params.ignore_errors.unwrap_or(false);
|
||||
|
||||
let m: mime::Mime = content_type.clone().into();
|
||||
let value = match m.subtype() {
|
||||
mime::JSON => transform_ndjson_array_factory(
|
||||
Deserializer::from_str(&payload).into_iter(),
|
||||
ignore_errors,
|
||||
)?,
|
||||
// add more content type support
|
||||
_ => UnsupportedContentTypeSnafu { content_type }.fail()?,
|
||||
};
|
||||
|
||||
ingest_logs_inner(
|
||||
handler,
|
||||
pipeline_name,
|
||||
version,
|
||||
table_name,
|
||||
value,
|
||||
query_ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn ingest_logs_inner(
|
||||
state: LogHandlerRef,
|
||||
pipeline_name: String,
|
||||
version: PipelineVersion,
|
||||
table_name: String,
|
||||
payload: Value,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<HttpResponse> {
|
||||
let start = std::time::Instant::now();
|
||||
let pipeline_data = PipelineValue::try_from(payload)
|
||||
.map_err(|reason| CastTypeSnafu { msg: reason }.build())
|
||||
.context(PipelineSnafu)?;
|
||||
|
||||
let pipeline = state
|
||||
.get_pipeline(&pipeline_name, version, query_ctx.clone())
|
||||
.await?;
|
||||
let transformed_data: Rows = pipeline
|
||||
.exec(pipeline_data)
|
||||
.map_err(|reason| PipelineTransformSnafu { reason }.build())
|
||||
.context(PipelineSnafu)?;
|
||||
|
||||
let insert_request = RowInsertRequest {
|
||||
rows: Some(transformed_data),
|
||||
table_name: table_name.clone(),
|
||||
};
|
||||
let insert_requests = RowInsertRequests {
|
||||
inserts: vec![insert_request],
|
||||
};
|
||||
let output = state.insert_logs(insert_requests, query_ctx).await;
|
||||
|
||||
let response = GreptimedbV1Response::from_output(vec![output])
|
||||
.await
|
||||
.with_execution_time(start.elapsed().as_millis() as u64);
|
||||
Ok(response)
|
||||
}
|
||||
@@ -35,6 +35,8 @@ use common_query::Output;
|
||||
use headers::HeaderValue;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use pipeline::table::PipelineVersion;
|
||||
use pipeline::{GreptimeTransformer, Pipeline};
|
||||
use serde_json::Value;
|
||||
use session::context::QueryContextRef;
|
||||
|
||||
@@ -48,6 +50,7 @@ pub type InfluxdbLineProtocolHandlerRef = Arc<dyn InfluxdbLineProtocolHandler +
|
||||
pub type PromStoreProtocolHandlerRef = Arc<dyn PromStoreProtocolHandler + Send + Sync>;
|
||||
pub type OpenTelemetryProtocolHandlerRef = Arc<dyn OpenTelemetryProtocolHandler + Send + Sync>;
|
||||
pub type ScriptHandlerRef = Arc<dyn ScriptHandler + Send + Sync>;
|
||||
pub type LogHandlerRef = Arc<dyn LogHandler + Send + Sync>;
|
||||
|
||||
#[async_trait]
|
||||
pub trait ScriptHandler {
|
||||
@@ -118,3 +121,29 @@ pub trait OpenTelemetryProtocolHandler {
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<Output>;
|
||||
}
|
||||
|
||||
/// LogHandler is responsible for handling log related requests.
|
||||
/// It should be able to insert logs and manage pipelines.
|
||||
/// The pipeline is a series of transformations that can be applied to logs.
|
||||
/// The pipeline is stored in the database and can be retrieved by name.
|
||||
#[async_trait]
|
||||
pub trait LogHandler {
|
||||
async fn insert_logs(&self, log: RowInsertRequests, ctx: QueryContextRef) -> Result<Output>;
|
||||
|
||||
async fn get_pipeline(
|
||||
&self,
|
||||
name: &str,
|
||||
version: PipelineVersion,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Arc<Pipeline<GreptimeTransformer>>>;
|
||||
|
||||
async fn insert_pipeline(
|
||||
&self,
|
||||
name: &str,
|
||||
content_type: &str,
|
||||
pipeline: &str,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<()>;
|
||||
|
||||
async fn delete_pipeline(&self, name: &str, query_ctx: QueryContextRef) -> Result<()>;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user