1use std::sync::Arc;
16
17use api::helper::ColumnDataTypeWrapper;
18use api::v1::alter_table_expr::Kind;
19use api::v1::{
20 AlterTableExpr, ColumnDataType, ModifyColumnType, ModifyColumnTypes, RowInsertRequests,
21};
22use async_trait::async_trait;
23use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
24use client::Output;
25use common_error::ext::{BoxedError, ErrorExt};
26use common_error::status_code::StatusCode;
27use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
28use common_telemetry::{tracing, warn};
29use itertools::Itertools;
30use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
31use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
32use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
33use pipeline::{GreptimePipelineParams, PipelineWay};
34use servers::error::{self, AuthSnafu, Result as ServerResult};
35use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
36use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
37use servers::otlp;
38use servers::otlp::trace::TraceAuxData;
39use servers::otlp::trace::coerce::{coerce_value_data, trace_value_datatype};
40use servers::otlp::trace::span::{TraceSpan, TraceSpanGroup};
41use servers::query_handler::{
42 OpenTelemetryProtocolHandler, PipelineHandlerRef, TraceIngestOutcome,
43};
44use session::context::QueryContextRef;
45use snafu::{IntoError, ResultExt};
46use table::requests::{OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM};
47
48use crate::instance::Instance;
49use crate::instance::otlp::trace_types::{
50 PendingTraceColumnRewrite, choose_trace_reconcile_decision, enrich_trace_reconcile_error,
51 is_trace_reconcile_candidate_type, push_observed_trace_type, validate_trace_column_rewrites,
52};
53use crate::metrics::{
54 OTLP_LOGS_ROWS, OTLP_METRICS_ROWS, OTLP_TRACES_FAILURE_COUNT, OTLP_TRACES_ROWS,
55};
56
57pub mod trace_types;
58
59const TRACE_INGEST_CHUNK_SIZE: usize = 64;
60const TRACE_FAILURE_MESSAGE_LIMIT: usize = 4;
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63enum ChunkFailureReaction {
64 RetryPerSpan,
65 DiscardChunk,
66 Propagate,
67}
68
69impl ChunkFailureReaction {
70 fn as_metric_label(self) -> &'static str {
71 match self {
72 Self::RetryPerSpan => "retry_per_span",
73 Self::DiscardChunk => "discard_chunk",
74 Self::Propagate => "propagate_failure",
75 }
76 }
77}
78
79struct TraceChunkIngestContext<'a> {
80 pipeline_handler: PipelineHandlerRef,
81 pipeline: &'a PipelineWay,
82 pipeline_params: &'a GreptimePipelineParams,
83 table_name: &'a str,
84 is_trace_v1_model: bool,
85}
86
87struct TraceIngestState {
88 aux_data: TraceAuxData,
89 outcome: TraceIngestOutcome,
90 failure_messages: Vec<String>,
91}
92
93#[async_trait]
94impl OpenTelemetryProtocolHandler for Instance {
95 #[tracing::instrument(skip_all)]
96 async fn metrics(
97 &self,
98 request: ExportMetricsServiceRequest,
99 ctx: QueryContextRef,
100 ) -> ServerResult<Output> {
101 self.plugins
102 .get::<PermissionCheckerRef>()
103 .as_ref()
104 .check_permission(ctx.current_user(), PermissionReq::Otlp)
105 .context(AuthSnafu)?;
106
107 let interceptor_ref = self
108 .plugins
109 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
110 interceptor_ref.pre_execute(ctx.clone())?;
111
112 let input_names = request
113 .resource_metrics
114 .iter()
115 .flat_map(|r| r.scope_metrics.iter())
116 .flat_map(|s| s.metrics.iter().map(|m| &m.name))
117 .collect::<Vec<_>>();
118
119 let is_legacy = self.check_otlp_legacy(&input_names, ctx.clone()).await?;
121
122 let mut metric_ctx = ctx
123 .protocol_ctx()
124 .get_otlp_metric_ctx()
125 .cloned()
126 .unwrap_or_default();
127 metric_ctx.is_legacy = is_legacy;
128
129 let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request, &mut metric_ctx)?;
130 OTLP_METRICS_ROWS.inc_by(rows as u64);
131
132 let ctx = if !is_legacy {
133 let mut c = (*ctx).clone();
134 c.set_extension(OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM.to_string());
135 Arc::new(c)
136 } else {
137 ctx
138 };
139
140 if metric_ctx.is_legacy || !metric_ctx.with_metric_engine {
142 self.handle_row_inserts(requests, ctx, false, false)
143 .await
144 .map_err(BoxedError::new)
145 .context(error::ExecuteGrpcQuerySnafu)
146 } else {
147 let physical_table = ctx
148 .extension(PHYSICAL_TABLE_PARAM)
149 .unwrap_or(GREPTIME_PHYSICAL_TABLE)
150 .to_string();
151 self.handle_metric_row_inserts(requests, ctx, physical_table.clone())
152 .await
153 .map_err(BoxedError::new)
154 .context(error::ExecuteGrpcQuerySnafu)
155 }
156 }
157
158 #[tracing::instrument(skip_all)]
159 async fn traces(
160 &self,
161 pipeline_handler: PipelineHandlerRef,
162 request: ExportTraceServiceRequest,
163 pipeline: PipelineWay,
164 pipeline_params: GreptimePipelineParams,
165 table_name: String,
166 ctx: QueryContextRef,
167 ) -> ServerResult<TraceIngestOutcome> {
168 self.plugins
169 .get::<PermissionCheckerRef>()
170 .as_ref()
171 .check_permission(ctx.current_user(), PermissionReq::Otlp)
172 .context(AuthSnafu)?;
173
174 let interceptor_ref = self
175 .plugins
176 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
177 interceptor_ref.pre_execute(ctx.clone())?;
178
179 let spans = otlp::trace::span::parse(request);
180 self.ingest_trace_spans(
181 pipeline_handler,
182 &pipeline,
183 &pipeline_params,
184 table_name,
185 spans,
186 ctx,
187 )
188 .await
189 }
190
191 #[tracing::instrument(skip_all)]
192 async fn logs(
193 &self,
194 pipeline_handler: PipelineHandlerRef,
195 request: ExportLogsServiceRequest,
196 pipeline: PipelineWay,
197 pipeline_params: GreptimePipelineParams,
198 table_name: String,
199 ctx: QueryContextRef,
200 ) -> ServerResult<Vec<Output>> {
201 self.plugins
202 .get::<PermissionCheckerRef>()
203 .as_ref()
204 .check_permission(ctx.current_user(), PermissionReq::Otlp)
205 .context(AuthSnafu)?;
206
207 let interceptor_ref = self
208 .plugins
209 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
210 interceptor_ref.pre_execute(ctx.clone())?;
211
212 let opt_req = otlp::logs::to_grpc_insert_requests(
213 request,
214 pipeline,
215 pipeline_params,
216 table_name,
217 &ctx,
218 pipeline_handler,
219 )
220 .await?;
221
222 let mut outputs = vec![];
223
224 for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {
225 let cnt = requests
226 .inserts
227 .iter()
228 .filter_map(|r| r.rows.as_ref().map(|r| r.rows.len()))
229 .sum::<usize>();
230
231 let o = self
232 .handle_log_inserts(requests, temp_ctx)
233 .await
234 .inspect(|_| OTLP_LOGS_ROWS.inc_by(cnt as u64))
235 .map_err(BoxedError::new)
236 .context(error::ExecuteGrpcQuerySnafu)?;
237 outputs.push(o);
238 }
239
240 Ok(outputs)
241 }
242}
243
244impl Instance {
245 async fn ingest_trace_spans(
248 &self,
249 pipeline_handler: PipelineHandlerRef,
250 pipeline: &PipelineWay,
251 pipeline_params: &GreptimePipelineParams,
252 table_name: String,
253 groups: Vec<TraceSpanGroup>,
254 ctx: QueryContextRef,
255 ) -> ServerResult<TraceIngestOutcome> {
256 let is_trace_v1_model = matches!(pipeline, PipelineWay::OtlpTraceDirectV1);
257 let ingest_ctx = TraceChunkIngestContext {
258 pipeline_handler,
259 pipeline,
260 pipeline_params,
261 table_name: &table_name,
262 is_trace_v1_model,
263 };
264 let mut ingest_state = TraceIngestState {
265 aux_data: TraceAuxData::default(),
266 outcome: TraceIngestOutcome::default(),
267 failure_messages: Vec::new(),
268 };
269
270 for group in groups {
271 let chunks = group
272 .spans
273 .into_iter()
274 .chunks(TRACE_INGEST_CHUNK_SIZE)
275 .into_iter()
276 .map(|chunk| chunk.collect::<Vec<_>>())
277 .collect::<Vec<_>>();
278 for chunk in chunks {
279 self.ingest_trace_chunk(&ingest_ctx, chunk, ctx.clone(), &mut ingest_state)
280 .await?;
281 }
282 }
283
284 OTLP_TRACES_ROWS.inc_by(ingest_state.outcome.accepted_spans as u64);
285
286 if !ingest_state.aux_data.is_empty() {
287 let (aux_requests, _) = otlp::trace::to_grpc_insert_requests_for_aux_tables(
291 std::mem::take(&mut ingest_state.aux_data),
292 ingest_ctx.pipeline,
293 ingest_ctx.table_name,
294 )?;
295
296 if !aux_requests.inserts.is_empty() {
297 match self
298 .insert_trace_requests(aux_requests, ingest_ctx.is_trace_v1_model, ctx)
299 .await
300 {
301 Ok(output) => {
302 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
303 }
304 Err(err) => {
305 Self::push_trace_failure_message(
306 &mut ingest_state.failure_messages,
307 "aux_table_update_failed",
308 format!(
309 "Auxiliary trace tables were not fully updated ({})",
310 err.status_code().as_ref()
311 ),
312 );
313 }
314 }
315 }
316 }
317
318 ingest_state.outcome.error_message = Self::finish_trace_failure_message(
319 ingest_state.outcome.accepted_spans,
320 ingest_state.outcome.rejected_spans,
321 ingest_state.failure_messages,
322 );
323
324 Ok(ingest_state.outcome)
325 }
326
327 async fn ingest_trace_chunk(
330 &self,
331 ingest_ctx: &TraceChunkIngestContext<'_>,
332 chunk: Vec<TraceSpan>,
333 ctx: QueryContextRef,
334 ingest_state: &mut TraceIngestState,
335 ) -> ServerResult<()> {
336 let (requests, chunk_rows) = otlp::trace::to_grpc_insert_requests_from_spans(
339 &chunk,
340 ingest_ctx.pipeline,
341 ingest_ctx.pipeline_params,
342 ingest_ctx.table_name,
343 &ctx,
344 ingest_ctx.pipeline_handler.clone(),
345 )?;
346
347 match self
348 .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
349 .await
350 {
351 Ok(output) => {
352 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
353 ingest_state.outcome.accepted_spans += chunk_rows;
354 for span in &chunk {
355 ingest_state.aux_data.observe_span(span);
356 }
357 }
358 Err(err) => match Self::classify_trace_chunk_failure(err.status_code()) {
359 ChunkFailureReaction::RetryPerSpan => {
360 Self::push_trace_failure_message(
361 &mut ingest_state.failure_messages,
362 ChunkFailureReaction::RetryPerSpan.as_metric_label(),
363 format!("Chunk fallback triggered by {}", err.status_code().as_ref()),
364 );
365 self.ingest_trace_chunk_span_by_span(
371 ingest_ctx,
372 chunk,
373 ctx.clone(),
374 ingest_state,
375 )
376 .await?;
377 }
378 ChunkFailureReaction::DiscardChunk => {
379 ingest_state.outcome.rejected_spans += chunk.len();
380 Self::push_trace_failure_message(
381 &mut ingest_state.failure_messages,
382 ChunkFailureReaction::DiscardChunk.as_metric_label(),
383 format!(
384 "Discarded {} spans after ambiguous chunk failure ({})",
385 chunk.len(),
386 err.status_code().as_ref()
387 ),
388 );
389 }
392 ChunkFailureReaction::Propagate => {
396 Self::push_trace_failure_message(
397 &mut ingest_state.failure_messages,
398 ChunkFailureReaction::Propagate.as_metric_label(),
399 format!(
400 "Propagating retryable chunk failure ({})",
401 err.status_code().as_ref()
402 ),
403 );
404 return Err(err);
405 }
406 },
407 }
408
409 Ok(())
410 }
411
412 async fn ingest_trace_chunk_span_by_span(
414 &self,
415 ingest_ctx: &TraceChunkIngestContext<'_>,
416 chunk: Vec<TraceSpan>,
417 ctx: QueryContextRef,
418 ingest_state: &mut TraceIngestState,
419 ) -> ServerResult<()> {
420 for span in chunk {
421 let (requests, rows) = otlp::trace::to_grpc_insert_requests_from_spans(
422 std::slice::from_ref(&span),
423 ingest_ctx.pipeline,
424 ingest_ctx.pipeline_params,
425 ingest_ctx.table_name,
426 &ctx,
427 ingest_ctx.pipeline_handler.clone(),
428 )?;
429
430 match self
431 .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
432 .await
433 {
434 Ok(output) => {
435 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
436 ingest_state.outcome.accepted_spans += rows;
437 ingest_state.aux_data.observe_span(&span);
438 }
439 Err(err) => {
440 if Self::should_propagate_trace_span_failure(err.status_code()) {
441 Self::push_trace_failure_message(
442 &mut ingest_state.failure_messages,
443 ChunkFailureReaction::Propagate.as_metric_label(),
444 format!(
445 "Propagating retryable span failure for {}:{} ({})",
446 span.trace_id,
447 span.span_id,
448 err.status_code().as_ref()
449 ),
450 );
451 return Err(err);
452 }
453
454 ingest_state.outcome.rejected_spans += 1;
455 Self::push_trace_failure_message(
456 &mut ingest_state.failure_messages,
457 "span_rejected",
458 format!(
459 "Rejected span {}:{} ({})",
460 span.trace_id,
461 span.span_id,
462 err.status_code().as_ref()
463 ),
464 );
465 }
466 }
467 }
468
469 Ok(())
470 }
471
472 async fn insert_trace_requests(
474 &self,
475 mut requests: RowInsertRequests,
476 is_trace_v1_model: bool,
477 ctx: QueryContextRef,
478 ) -> ServerResult<Output> {
479 if is_trace_v1_model {
480 self.reconcile_trace_column_types(&mut requests, &ctx)
481 .await?;
482 self.handle_trace_inserts(requests, ctx)
483 .await
484 .map_err(BoxedError::new)
485 .context(error::ExecuteGrpcQuerySnafu)
486 } else {
487 self.handle_log_inserts(requests, ctx)
488 .await
489 .map_err(BoxedError::new)
490 .context(error::ExecuteGrpcQuerySnafu)
491 }
492 }
493
494 fn classify_trace_chunk_failure(status: StatusCode) -> ChunkFailureReaction {
495 match status {
496 StatusCode::InvalidArguments
497 | StatusCode::InvalidSyntax
498 | StatusCode::Unsupported
499 | StatusCode::TableNotFound
500 | StatusCode::TableColumnNotFound => ChunkFailureReaction::RetryPerSpan,
501 StatusCode::DatabaseNotFound => ChunkFailureReaction::DiscardChunk,
502 StatusCode::Cancelled | StatusCode::DeadlineExceeded => ChunkFailureReaction::Propagate,
503 _ if status.is_retryable() => ChunkFailureReaction::Propagate,
504 _ => ChunkFailureReaction::DiscardChunk,
505 }
506 }
507
508 fn should_propagate_trace_span_failure(status: StatusCode) -> bool {
509 matches!(
510 Self::classify_trace_chunk_failure(status),
511 ChunkFailureReaction::Propagate
512 )
513 }
514
515 fn add_trace_write_cost(outcome: &mut TraceIngestOutcome, cost: usize) {
516 outcome.write_cost += cost;
517 }
518
519 fn push_trace_failure_message(messages: &mut Vec<String>, label: &str, message: String) {
520 OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).inc();
521
522 if messages.len() < TRACE_FAILURE_MESSAGE_LIMIT {
523 messages.push(message);
524 } else if messages.len() == TRACE_FAILURE_MESSAGE_LIMIT {
525 tracing::debug!(
526 label,
527 limit = TRACE_FAILURE_MESSAGE_LIMIT,
528 "Trace ingest failure message limit reached; suppressing additional failure details"
529 );
530 }
531 }
532
533 fn finish_trace_failure_message(
534 accepted_spans: usize,
535 rejected_spans: usize,
536 messages: Vec<String>,
537 ) -> Option<String> {
538 if rejected_spans == 0 && messages.is_empty() {
539 return None;
540 }
541
542 let mut summary = format!(
543 "Accepted {} spans, rejected {} spans",
544 accepted_spans, rejected_spans
545 );
546
547 if !messages.is_empty() {
548 summary.push_str(": ");
549 summary.push_str(&messages.join("; "));
550 }
551
552 Some(summary)
553 }
554
555 async fn alter_trace_table_columns_to_float64(
557 &self,
558 ctx: &QueryContextRef,
559 table_name: &str,
560 column_names: &[String],
561 ) -> ServerResult<()> {
562 let catalog_name = ctx.current_catalog().to_string();
563 let schema_name = ctx.current_schema();
564 let alter_expr = AlterTableExpr {
565 catalog_name: catalog_name.clone(),
566 schema_name: schema_name.clone(),
567 table_name: table_name.to_string(),
568 kind: Some(Kind::ModifyColumnTypes(ModifyColumnTypes {
569 modify_column_types: column_names
570 .iter()
571 .map(|column_name| ModifyColumnType {
572 column_name: column_name.clone(),
573 target_type: ColumnDataType::Float64 as i32,
574 target_type_extension: None,
575 })
576 .collect(),
577 })),
578 };
579
580 if let Err(err) = self
581 .statement_executor
582 .alter_table_inner(alter_expr, ctx.clone())
583 .await
584 {
585 let table = self
586 .catalog_manager
587 .table(&catalog_name, &schema_name, table_name, None)
588 .await
589 .map_err(servers::error::Error::from)?;
590 let alter_already_applied = table
591 .map(|table| {
592 let table_schema = table.schema();
593 column_names.iter().all(|column_name| {
594 table_schema
595 .column_schema_by_name(column_name)
596 .and_then(|table_col| {
597 ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
598 .ok()
599 .map(|wrapper| wrapper.datatype())
600 })
601 == Some(ColumnDataType::Float64)
602 })
603 })
604 .unwrap_or(false);
605
606 if alter_already_applied {
607 return Ok(());
608 }
609
610 warn!(
611 table_name,
612 columns = ?column_names,
613 error = %err,
614 "failed to widen trace columns before insert"
615 );
616
617 return Err(wrap_trace_alter_failure(err));
618 }
619
620 Ok(())
621 }
622
623 async fn reconcile_trace_column_types(
627 &self,
628 requests: &mut RowInsertRequests,
629 ctx: &QueryContextRef,
630 ) -> ServerResult<()> {
631 let catalog = ctx.current_catalog();
632 let schema = ctx.current_schema();
633
634 for req in &mut requests.inserts {
635 let table = self
636 .catalog_manager
637 .table(catalog, &schema, &req.table_name, None)
638 .await?;
639
640 let Some(rows) = req.rows.as_mut() else {
641 continue;
642 };
643
644 let table_schema = table.map(|table| table.schema());
645 let mut pending_rewrites = Vec::new();
646 let mut pending_alter_columns = Vec::new();
647
648 for (col_idx, col_schema) in rows.schema.iter().enumerate() {
649 let Some(current_type) = ColumnDataType::try_from(col_schema.datatype).ok() else {
650 continue;
651 };
652
653 let mut observed_types = Vec::new();
654 push_observed_trace_type(&mut observed_types, current_type);
655
656 for row in &rows.rows {
659 let Some(value) = row
660 .values
661 .get(col_idx)
662 .and_then(|value| value.value_data.as_ref())
663 else {
664 continue;
665 };
666
667 let Some(value_type) = trace_value_datatype(value) else {
668 continue;
669 };
670 push_observed_trace_type(&mut observed_types, value_type);
671 }
672
673 let existing_type = table_schema
674 .as_ref()
675 .and_then(|schema| schema.column_schema_by_name(&col_schema.column_name))
676 .and_then(|table_col| {
677 ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
678 .ok()
679 .map(|wrapper| wrapper.datatype())
680 });
681
682 if !observed_types
683 .iter()
684 .copied()
685 .any(is_trace_reconcile_candidate_type)
686 && existing_type
687 .map(|datatype| !is_trace_reconcile_candidate_type(datatype))
688 .unwrap_or(true)
689 {
690 continue;
691 }
692
693 let Some(decision) =
696 choose_trace_reconcile_decision(&observed_types, existing_type).map_err(
697 |_| {
698 enrich_trace_reconcile_error(
699 &req.table_name,
700 &col_schema.column_name,
701 &observed_types,
702 existing_type,
703 )
704 },
705 )?
706 else {
707 continue;
708 };
709 let target_type = decision.target_type();
710
711 if !decision.requires_alter()
712 && observed_types
713 .iter()
714 .all(|observed| *observed == target_type)
715 && col_schema.datatype == target_type as i32
716 {
717 continue;
718 }
719
720 if decision.requires_alter()
721 && !pending_alter_columns.contains(&col_schema.column_name)
722 {
723 pending_alter_columns.push(col_schema.column_name.clone());
724 }
725
726 pending_rewrites.push(PendingTraceColumnRewrite {
727 col_idx,
728 target_type,
729 column_name: col_schema.column_name.clone(),
730 });
731 }
732
733 if pending_rewrites.is_empty() {
734 continue;
735 }
736
737 validate_trace_column_rewrites(&rows.rows, &pending_rewrites, &req.table_name)?;
738
739 if !pending_alter_columns.is_empty() {
740 self.alter_trace_table_columns_to_float64(
741 ctx,
742 &req.table_name,
743 &pending_alter_columns,
744 )
745 .await?;
746 }
747
748 for pending_rewrite in &pending_rewrites {
750 rows.schema[pending_rewrite.col_idx].datatype = pending_rewrite.target_type as i32;
751 }
752
753 for row in &mut rows.rows {
755 for pending_rewrite in &pending_rewrites {
756 let Some(value) = row.values.get_mut(pending_rewrite.col_idx) else {
757 continue;
758 };
759 let Some(request_type) =
760 value.value_data.as_ref().and_then(trace_value_datatype)
761 else {
762 continue;
763 };
764 if request_type == pending_rewrite.target_type {
765 continue;
766 }
767
768 value.value_data = coerce_value_data(
769 &value.value_data,
770 pending_rewrite.target_type,
771 request_type,
772 )
773 .map_err(|_| {
774 error::InvalidParameterSnafu {
775 reason: format!(
776 "failed to coerce trace column '{}' in table '{}' from {:?} to {:?}",
777 pending_rewrite.column_name,
778 req.table_name,
779 request_type,
780 pending_rewrite.target_type
781 ),
782 }
783 .build()
784 })?;
785 }
786 }
787 }
788
789 Ok(())
790 }
791}
792
793fn wrap_trace_alter_failure<E>(err: E) -> servers::error::Error
795where
796 E: ErrorExt + Send + Sync + 'static,
797{
798 error::ExecuteGrpcQuerySnafu.into_error(BoxedError::new(err))
799}
800
801#[cfg(test)]
802mod tests {
803 use common_error::ext::ErrorExt;
804 use common_error::status_code::StatusCode;
805 use servers::query_handler::TraceIngestOutcome;
806
807 use super::{ChunkFailureReaction, Instance, wrap_trace_alter_failure};
808 use crate::metrics::OTLP_TRACES_FAILURE_COUNT;
809
810 #[test]
811 fn test_classify_trace_chunk_failure() {
812 assert_eq!(
813 Instance::classify_trace_chunk_failure(StatusCode::InvalidArguments),
814 ChunkFailureReaction::RetryPerSpan
815 );
816 assert_eq!(
817 Instance::classify_trace_chunk_failure(StatusCode::InvalidSyntax),
818 ChunkFailureReaction::RetryPerSpan
819 );
820 assert_eq!(
821 Instance::classify_trace_chunk_failure(StatusCode::Unsupported),
822 ChunkFailureReaction::RetryPerSpan
823 );
824 assert_eq!(
825 Instance::classify_trace_chunk_failure(StatusCode::TableColumnNotFound),
826 ChunkFailureReaction::RetryPerSpan
827 );
828 assert_eq!(
829 Instance::classify_trace_chunk_failure(StatusCode::TableNotFound),
830 ChunkFailureReaction::RetryPerSpan
831 );
832 assert_eq!(
833 Instance::classify_trace_chunk_failure(StatusCode::DatabaseNotFound),
834 ChunkFailureReaction::DiscardChunk
835 );
836 assert_eq!(
837 Instance::classify_trace_chunk_failure(StatusCode::DeadlineExceeded),
838 ChunkFailureReaction::Propagate
839 );
840 assert_eq!(
841 Instance::classify_trace_chunk_failure(StatusCode::Cancelled),
842 ChunkFailureReaction::Propagate
843 );
844 assert_eq!(
845 Instance::classify_trace_chunk_failure(StatusCode::StorageUnavailable),
846 ChunkFailureReaction::Propagate
847 );
848 assert_eq!(
849 Instance::classify_trace_chunk_failure(StatusCode::Internal),
850 ChunkFailureReaction::Propagate
851 );
852 assert_eq!(
853 Instance::classify_trace_chunk_failure(StatusCode::RegionNotReady),
854 ChunkFailureReaction::Propagate
855 );
856 assert_eq!(
857 Instance::classify_trace_chunk_failure(StatusCode::TableUnavailable),
858 ChunkFailureReaction::Propagate
859 );
860 assert_eq!(
861 Instance::classify_trace_chunk_failure(StatusCode::RegionBusy),
862 ChunkFailureReaction::Propagate
863 );
864 assert_eq!(
865 Instance::classify_trace_chunk_failure(StatusCode::RuntimeResourcesExhausted),
866 ChunkFailureReaction::Propagate
867 );
868 }
869
870 #[test]
871 fn test_classify_trace_span_failure() {
872 assert!(Instance::should_propagate_trace_span_failure(
873 StatusCode::DeadlineExceeded
874 ));
875 assert!(Instance::should_propagate_trace_span_failure(
876 StatusCode::StorageUnavailable
877 ));
878 assert!(!Instance::should_propagate_trace_span_failure(
879 StatusCode::InvalidArguments
880 ));
881 }
882
883 #[test]
884 fn test_add_trace_write_cost() {
885 let mut outcome = TraceIngestOutcome::default();
886 Instance::add_trace_write_cost(&mut outcome, 3);
887 Instance::add_trace_write_cost(&mut outcome, 5);
888 assert_eq!(outcome.write_cost, 8);
889 }
890
891 #[test]
892 fn test_finish_trace_failure_message() {
893 let message = Instance::finish_trace_failure_message(
894 3,
895 2,
896 vec!["Rejected span trace:span (InvalidArguments)".to_string()],
897 )
898 .unwrap();
899 assert!(message.contains("Accepted 3 spans, rejected 2 spans"));
900 assert!(message.contains("Rejected span trace:span"));
901
902 assert_eq!(Instance::finish_trace_failure_message(2, 0, vec![]), None);
903 }
904
905 #[test]
906 fn test_finish_trace_failure_message_without_detail_messages() {
907 assert_eq!(
908 Instance::finish_trace_failure_message(0, 2, vec![]),
909 Some("Accepted 0 spans, rejected 2 spans".to_string())
910 );
911 }
912
913 #[test]
914 fn test_push_trace_failure_message_increments_labeled_counter() {
915 let label = "retry_per_span_counter_test";
916 let initial = OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get();
917 let mut messages = Vec::new();
918
919 Instance::push_trace_failure_message(
920 &mut messages,
921 label,
922 "Chunk fallback triggered by InvalidArguments".to_string(),
923 );
924
925 assert_eq!(messages.len(), 1);
926 assert_eq!(
927 OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get(),
928 initial + 1
929 );
930 }
931
932 #[test]
933 fn test_push_trace_failure_message_caps_recorded_messages() {
934 let label = "retry_per_span_limit_test";
935 let mut messages = Vec::new();
936
937 for idx in 0..=4 {
938 Instance::push_trace_failure_message(&mut messages, label, format!("failure-{idx}"));
939 }
940
941 assert_eq!(messages.len(), 4);
942 assert_eq!(
943 messages,
944 vec![
945 "failure-0".to_string(),
946 "failure-1".to_string(),
947 "failure-2".to_string(),
948 "failure-3".to_string()
949 ]
950 );
951 }
952
953 #[test]
954 fn test_classify_trace_chunk_failure_defaults_to_discard() {
955 assert_eq!(
956 Instance::classify_trace_chunk_failure(StatusCode::Unknown),
957 ChunkFailureReaction::DiscardChunk
958 );
959 }
960
961 #[test]
962 fn test_wrap_trace_alter_failure_preserves_status_code() {
963 let err = wrap_trace_alter_failure(
964 servers::error::TableNotFoundSnafu {
965 catalog: "greptime".to_string(),
966 schema: "public".to_string(),
967 table: "trace_type_missing".to_string(),
968 }
969 .build(),
970 );
971
972 assert_eq!(err.status_code(), StatusCode::TableNotFound);
973 }
974}