1use std::sync::Arc;
16
17use api::helper::ColumnDataTypeWrapper;
18use api::v1::alter_table_expr::Kind;
19use api::v1::{
20 AlterTableExpr, ColumnDataType, ModifyColumnType, ModifyColumnTypes, RowInsertRequests,
21};
22use async_trait::async_trait;
23use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
24use client::Output;
25use common_error::ext::{BoxedError, ErrorExt};
26use common_error::status_code::StatusCode;
27use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
28use common_telemetry::{tracing, warn};
29use itertools::Itertools;
30use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
31use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
32use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
33use pipeline::{GreptimePipelineParams, PipelineWay};
34use servers::error::{self, AuthSnafu, Result as ServerResult};
35use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
36use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
37use servers::otlp;
38use servers::otlp::trace::TraceAuxData;
39use servers::otlp::trace::coerce::{coerce_value_data, trace_value_datatype};
40use servers::otlp::trace::span::{TraceSpan, TraceSpanGroup};
41use servers::query_handler::{
42 OpenTelemetryProtocolHandler, PipelineHandlerRef, TraceIngestOutcome,
43};
44use session::context::QueryContextRef;
45use snafu::{IntoError, ResultExt};
46use table::requests::{
47 OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM, SEMANTIC_PIPELINE, SEMANTIC_SIGNAL_TYPE,
48 SEMANTIC_SOURCE, SEMANTIC_TRACE_CONVENTIONS, SEMANTIC_TRACE_HAS_EVENTS,
49 SEMANTIC_TRACE_HAS_LINKS, SEMANTIC_VALUE_UNKNOWN, SIGNAL_TYPE_LOG, SIGNAL_TYPE_METRIC,
50 SIGNAL_TYPE_TRACE, SOURCE_OPENTELEMETRY, TABLE_DATA_MODEL_TRACE_V1,
51};
52
53use crate::instance::Instance;
54use crate::instance::otlp::trace_semconv::trace_semconv_fixed_type;
55use crate::instance::otlp::trace_types::{
56 PendingTraceColumnRewrite, choose_trace_reconcile_decision, enrich_trace_reconcile_error,
57 is_trace_reconcile_candidate_type, push_observed_trace_type, validate_trace_column_rewrites,
58};
59use crate::metrics::{
60 OTLP_LOGS_ROWS, OTLP_METRICS_ROWS, OTLP_TRACES_FAILURE_COUNT, OTLP_TRACES_ROWS,
61};
62
63pub mod trace_semconv;
64pub mod trace_types;
65
66const TRACE_INGEST_CHUNK_SIZE: usize = 64;
67const TRACE_FAILURE_MESSAGE_LIMIT: usize = 4;
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70enum ChunkFailureReaction {
71 RetryPerSpan,
72 DiscardChunk,
73 Propagate,
74}
75
76impl ChunkFailureReaction {
77 fn as_metric_label(self) -> &'static str {
78 match self {
79 Self::RetryPerSpan => "retry_per_span",
80 Self::DiscardChunk => "discard_chunk",
81 Self::Propagate => "propagate_failure",
82 }
83 }
84}
85
86struct TraceChunkIngestContext<'a> {
87 pipeline_handler: PipelineHandlerRef,
88 pipeline: &'a PipelineWay,
89 pipeline_params: &'a GreptimePipelineParams,
90 table_name: &'a str,
91 is_trace_v1_model: bool,
92}
93
94struct TraceIngestState {
95 aux_data: TraceAuxData,
96 outcome: TraceIngestOutcome,
97 failure_messages: Vec<String>,
98}
99
100#[async_trait]
101impl OpenTelemetryProtocolHandler for Instance {
102 #[tracing::instrument(skip_all)]
103 async fn metrics(
104 &self,
105 request: ExportMetricsServiceRequest,
106 ctx: QueryContextRef,
107 ) -> ServerResult<Output> {
108 self.plugins
109 .get::<PermissionCheckerRef>()
110 .as_ref()
111 .check_permission(ctx.current_user(), PermissionReq::Otlp)
112 .context(AuthSnafu)?;
113
114 let interceptor_ref = self
115 .plugins
116 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
117 interceptor_ref.pre_execute(ctx.clone())?;
118
119 let input_names = request
120 .resource_metrics
121 .iter()
122 .flat_map(|r| r.scope_metrics.iter())
123 .flat_map(|s| s.metrics.iter().map(|m| &m.name))
124 .collect::<Vec<_>>();
125
126 let is_legacy = self.check_otlp_legacy(&input_names, ctx.clone()).await?;
128
129 let mut metric_ctx = ctx
130 .protocol_ctx()
131 .get_otlp_metric_ctx()
132 .cloned()
133 .unwrap_or_default();
134 metric_ctx.is_legacy = is_legacy;
135
136 let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request, &mut metric_ctx)?;
137 OTLP_METRICS_ROWS.inc_by(rows as u64);
138
139 let ctx = {
140 let mut c = (*ctx).clone();
141 c.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_METRIC);
142 c.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
143 if !is_legacy {
144 c.set_extension(OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM.to_string());
145 }
146 Arc::new(c)
147 };
148
149 if metric_ctx.is_legacy || !metric_ctx.with_metric_engine {
151 self.handle_row_inserts(requests, ctx, false, false)
152 .await
153 .map_err(BoxedError::new)
154 .context(error::ExecuteGrpcQuerySnafu)
155 } else {
156 let physical_table = ctx
157 .extension(PHYSICAL_TABLE_PARAM)
158 .unwrap_or(GREPTIME_PHYSICAL_TABLE)
159 .to_string();
160 self.handle_metric_row_inserts(requests, ctx, physical_table.clone())
161 .await
162 .map_err(BoxedError::new)
163 .context(error::ExecuteGrpcQuerySnafu)
164 }
165 }
166
167 #[tracing::instrument(skip_all)]
168 async fn traces(
169 &self,
170 pipeline_handler: PipelineHandlerRef,
171 request: ExportTraceServiceRequest,
172 pipeline: PipelineWay,
173 pipeline_params: GreptimePipelineParams,
174 table_name: String,
175 ctx: QueryContextRef,
176 ) -> ServerResult<TraceIngestOutcome> {
177 self.plugins
178 .get::<PermissionCheckerRef>()
179 .as_ref()
180 .check_permission(ctx.current_user(), PermissionReq::Otlp)
181 .context(AuthSnafu)?;
182
183 let interceptor_ref = self
184 .plugins
185 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
186 interceptor_ref.pre_execute(ctx.clone())?;
187
188 let spans = otlp::trace::span::parse(request);
189 self.ingest_trace_spans(
190 pipeline_handler,
191 &pipeline,
192 &pipeline_params,
193 table_name,
194 spans,
195 ctx,
196 )
197 .await
198 }
199
200 #[tracing::instrument(skip_all)]
201 async fn logs(
202 &self,
203 pipeline_handler: PipelineHandlerRef,
204 request: ExportLogsServiceRequest,
205 pipeline: PipelineWay,
206 pipeline_params: GreptimePipelineParams,
207 table_name: String,
208 ctx: QueryContextRef,
209 ) -> ServerResult<Vec<Output>> {
210 self.plugins
211 .get::<PermissionCheckerRef>()
212 .as_ref()
213 .check_permission(ctx.current_user(), PermissionReq::Otlp)
214 .context(AuthSnafu)?;
215
216 let interceptor_ref = self
217 .plugins
218 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
219 interceptor_ref.pre_execute(ctx.clone())?;
220
221 let ctx = {
224 let mut c = (*ctx).clone();
225 c.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_LOG);
226 c.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
227 Arc::new(c)
228 };
229
230 let opt_req = otlp::logs::to_grpc_insert_requests(
231 request,
232 pipeline,
233 pipeline_params,
234 table_name,
235 &ctx,
236 pipeline_handler,
237 )
238 .await?;
239
240 let mut outputs = vec![];
241
242 for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {
243 let cnt = requests
244 .inserts
245 .iter()
246 .filter_map(|r| r.rows.as_ref().map(|r| r.rows.len()))
247 .sum::<usize>();
248
249 let o = self
250 .handle_log_inserts(requests, temp_ctx)
251 .await
252 .inspect(|_| OTLP_LOGS_ROWS.inc_by(cnt as u64))
253 .map_err(BoxedError::new)
254 .context(error::ExecuteGrpcQuerySnafu)?;
255 outputs.push(o);
256 }
257
258 Ok(outputs)
259 }
260}
261
262impl Instance {
263 async fn ingest_trace_spans(
266 &self,
267 pipeline_handler: PipelineHandlerRef,
268 pipeline: &PipelineWay,
269 pipeline_params: &GreptimePipelineParams,
270 table_name: String,
271 groups: Vec<TraceSpanGroup>,
272 ctx: QueryContextRef,
273 ) -> ServerResult<TraceIngestOutcome> {
274 let is_trace_v1_model = matches!(pipeline, PipelineWay::OtlpTraceDirectV1);
275
276 let main_ctx = {
279 let mut c = (*ctx).clone();
280 c.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_TRACE);
281 c.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
282 if is_trace_v1_model {
283 c.set_extension(SEMANTIC_PIPELINE, TABLE_DATA_MODEL_TRACE_V1);
284 c.set_extension(SEMANTIC_TRACE_HAS_EVENTS, "true");
285 c.set_extension(SEMANTIC_TRACE_HAS_LINKS, "true");
286 c.set_extension(SEMANTIC_TRACE_CONVENTIONS, SEMANTIC_VALUE_UNKNOWN);
288 }
289 Arc::new(c)
290 };
291
292 let ingest_ctx = TraceChunkIngestContext {
293 pipeline_handler,
294 pipeline,
295 pipeline_params,
296 table_name: &table_name,
297 is_trace_v1_model,
298 };
299 let mut ingest_state = TraceIngestState {
300 aux_data: TraceAuxData::default(),
301 outcome: TraceIngestOutcome::default(),
302 failure_messages: Vec::new(),
303 };
304
305 for group in groups {
306 let chunks = group
307 .spans
308 .into_iter()
309 .chunks(TRACE_INGEST_CHUNK_SIZE)
310 .into_iter()
311 .map(|chunk| chunk.collect::<Vec<_>>())
312 .collect::<Vec<_>>();
313 for chunk in chunks {
314 self.ingest_trace_chunk(&ingest_ctx, chunk, main_ctx.clone(), &mut ingest_state)
315 .await?;
316 }
317 }
318
319 OTLP_TRACES_ROWS.inc_by(ingest_state.outcome.accepted_spans as u64);
320
321 if !ingest_state.aux_data.is_empty() {
322 let (aux_requests, _) = otlp::trace::to_grpc_insert_requests_for_aux_tables(
326 std::mem::take(&mut ingest_state.aux_data),
327 ingest_ctx.pipeline,
328 ingest_ctx.table_name,
329 )?;
330
331 if !aux_requests.inserts.is_empty() {
332 match self
333 .insert_trace_requests(aux_requests, ingest_ctx.is_trace_v1_model, ctx)
334 .await
335 {
336 Ok(output) => {
337 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
338 }
339 Err(err) => {
340 Self::push_trace_failure_message(
341 &mut ingest_state.failure_messages,
342 "aux_table_update_failed",
343 format!(
344 "Auxiliary trace tables were not fully updated ({})",
345 err.status_code().as_ref()
346 ),
347 );
348 }
349 }
350 }
351 }
352
353 ingest_state.outcome.error_message = Self::finish_trace_failure_message(
354 ingest_state.outcome.accepted_spans,
355 ingest_state.outcome.rejected_spans,
356 ingest_state.failure_messages,
357 );
358
359 Ok(ingest_state.outcome)
360 }
361
362 async fn ingest_trace_chunk(
365 &self,
366 ingest_ctx: &TraceChunkIngestContext<'_>,
367 chunk: Vec<TraceSpan>,
368 ctx: QueryContextRef,
369 ingest_state: &mut TraceIngestState,
370 ) -> ServerResult<()> {
371 let (requests, chunk_rows) = otlp::trace::to_grpc_insert_requests_from_spans(
374 &chunk,
375 ingest_ctx.pipeline,
376 ingest_ctx.pipeline_params,
377 ingest_ctx.table_name,
378 &ctx,
379 ingest_ctx.pipeline_handler.clone(),
380 )?;
381
382 match self
383 .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
384 .await
385 {
386 Ok(output) => {
387 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
388 ingest_state.outcome.accepted_spans += chunk_rows;
389 for span in &chunk {
390 ingest_state.aux_data.observe_span(span);
391 }
392 }
393 Err(err) => match Self::classify_trace_chunk_failure(err.status_code()) {
394 ChunkFailureReaction::RetryPerSpan => {
395 Self::push_trace_failure_message(
396 &mut ingest_state.failure_messages,
397 ChunkFailureReaction::RetryPerSpan.as_metric_label(),
398 format!("Chunk fallback triggered by {}", err.status_code().as_ref()),
399 );
400 self.ingest_trace_chunk_span_by_span(
406 ingest_ctx,
407 chunk,
408 ctx.clone(),
409 ingest_state,
410 )
411 .await?;
412 }
413 ChunkFailureReaction::DiscardChunk => {
414 ingest_state.outcome.rejected_spans += chunk.len();
415 Self::push_trace_failure_message(
416 &mut ingest_state.failure_messages,
417 ChunkFailureReaction::DiscardChunk.as_metric_label(),
418 format!(
419 "Discarded {} spans after ambiguous chunk failure ({})",
420 chunk.len(),
421 err.status_code().as_ref()
422 ),
423 );
424 }
427 ChunkFailureReaction::Propagate => {
431 Self::push_trace_failure_message(
432 &mut ingest_state.failure_messages,
433 ChunkFailureReaction::Propagate.as_metric_label(),
434 format!(
435 "Propagating retryable chunk failure ({})",
436 err.status_code().as_ref()
437 ),
438 );
439 return Err(err);
440 }
441 },
442 }
443
444 Ok(())
445 }
446
447 async fn ingest_trace_chunk_span_by_span(
449 &self,
450 ingest_ctx: &TraceChunkIngestContext<'_>,
451 chunk: Vec<TraceSpan>,
452 ctx: QueryContextRef,
453 ingest_state: &mut TraceIngestState,
454 ) -> ServerResult<()> {
455 for span in chunk {
456 let (requests, rows) = otlp::trace::to_grpc_insert_requests_from_spans(
457 std::slice::from_ref(&span),
458 ingest_ctx.pipeline,
459 ingest_ctx.pipeline_params,
460 ingest_ctx.table_name,
461 &ctx,
462 ingest_ctx.pipeline_handler.clone(),
463 )?;
464
465 match self
466 .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
467 .await
468 {
469 Ok(output) => {
470 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
471 ingest_state.outcome.accepted_spans += rows;
472 ingest_state.aux_data.observe_span(&span);
473 }
474 Err(err) => {
475 if Self::should_propagate_trace_span_failure(err.status_code()) {
476 Self::push_trace_failure_message(
477 &mut ingest_state.failure_messages,
478 ChunkFailureReaction::Propagate.as_metric_label(),
479 format!(
480 "Propagating retryable span failure for {}:{} ({})",
481 span.trace_id,
482 span.span_id,
483 err.status_code().as_ref()
484 ),
485 );
486 return Err(err);
487 }
488
489 ingest_state.outcome.rejected_spans += 1;
490 Self::push_trace_failure_message(
491 &mut ingest_state.failure_messages,
492 "span_rejected",
493 format!(
494 "Rejected span {}:{} ({})",
495 span.trace_id,
496 span.span_id,
497 err.status_code().as_ref()
498 ),
499 );
500 }
501 }
502 }
503
504 Ok(())
505 }
506
507 async fn insert_trace_requests(
509 &self,
510 mut requests: RowInsertRequests,
511 is_trace_v1_model: bool,
512 ctx: QueryContextRef,
513 ) -> ServerResult<Output> {
514 if is_trace_v1_model {
515 self.reconcile_trace_column_types(&mut requests, &ctx)
516 .await?;
517 self.handle_trace_inserts(requests, ctx)
518 .await
519 .map_err(BoxedError::new)
520 .context(error::ExecuteGrpcQuerySnafu)
521 } else {
522 self.handle_log_inserts(requests, ctx)
523 .await
524 .map_err(BoxedError::new)
525 .context(error::ExecuteGrpcQuerySnafu)
526 }
527 }
528
529 fn classify_trace_chunk_failure(status: StatusCode) -> ChunkFailureReaction {
530 match status {
531 StatusCode::InvalidArguments
532 | StatusCode::InvalidSyntax
533 | StatusCode::Unsupported
534 | StatusCode::TableNotFound
535 | StatusCode::TableColumnNotFound => ChunkFailureReaction::RetryPerSpan,
536 StatusCode::DatabaseNotFound => ChunkFailureReaction::DiscardChunk,
537 StatusCode::Cancelled | StatusCode::DeadlineExceeded => ChunkFailureReaction::Propagate,
538 _ if status.is_retryable() => ChunkFailureReaction::Propagate,
539 _ => ChunkFailureReaction::DiscardChunk,
540 }
541 }
542
543 fn should_propagate_trace_span_failure(status: StatusCode) -> bool {
544 matches!(
545 Self::classify_trace_chunk_failure(status),
546 ChunkFailureReaction::Propagate
547 )
548 }
549
550 fn add_trace_write_cost(outcome: &mut TraceIngestOutcome, cost: usize) {
551 outcome.write_cost += cost;
552 }
553
554 fn push_trace_failure_message(messages: &mut Vec<String>, label: &str, message: String) {
555 OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).inc();
556
557 if messages.len() < TRACE_FAILURE_MESSAGE_LIMIT {
558 messages.push(message);
559 } else if messages.len() == TRACE_FAILURE_MESSAGE_LIMIT {
560 tracing::debug!(
561 label,
562 limit = TRACE_FAILURE_MESSAGE_LIMIT,
563 "Trace ingest failure message limit reached; suppressing additional failure details"
564 );
565 }
566 }
567
568 fn finish_trace_failure_message(
569 accepted_spans: usize,
570 rejected_spans: usize,
571 messages: Vec<String>,
572 ) -> Option<String> {
573 if rejected_spans == 0 && messages.is_empty() {
574 return None;
575 }
576
577 let mut summary = format!(
578 "Accepted {} spans, rejected {} spans",
579 accepted_spans, rejected_spans
580 );
581
582 if !messages.is_empty() {
583 summary.push_str(": ");
584 summary.push_str(&messages.join("; "));
585 }
586
587 Some(summary)
588 }
589
590 async fn alter_trace_table_columns_to_float64(
592 &self,
593 ctx: &QueryContextRef,
594 table_name: &str,
595 column_names: &[String],
596 ) -> ServerResult<()> {
597 let catalog_name = ctx.current_catalog().to_string();
598 let schema_name = ctx.current_schema();
599 let alter_expr = AlterTableExpr {
600 catalog_name: catalog_name.clone(),
601 schema_name: schema_name.clone(),
602 table_name: table_name.to_string(),
603 kind: Some(Kind::ModifyColumnTypes(ModifyColumnTypes {
604 modify_column_types: column_names
605 .iter()
606 .map(|column_name| ModifyColumnType {
607 column_name: column_name.clone(),
608 target_type: ColumnDataType::Float64 as i32,
609 target_type_extension: None,
610 })
611 .collect(),
612 })),
613 };
614
615 if let Err(err) = self
616 .statement_executor
617 .alter_table_inner(alter_expr, ctx.clone())
618 .await
619 {
620 let table = self
621 .catalog_manager
622 .table(&catalog_name, &schema_name, table_name, None)
623 .await
624 .map_err(servers::error::Error::from)?;
625 let alter_already_applied = table
626 .map(|table| {
627 let table_schema = table.schema();
628 column_names.iter().all(|column_name| {
629 table_schema
630 .column_schema_by_name(column_name)
631 .and_then(|table_col| {
632 ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
633 .ok()
634 .map(|wrapper| wrapper.datatype())
635 })
636 == Some(ColumnDataType::Float64)
637 })
638 })
639 .unwrap_or(false);
640
641 if alter_already_applied {
642 return Ok(());
643 }
644
645 warn!(
646 table_name,
647 columns = ?column_names,
648 error = %err,
649 "failed to widen trace columns before insert"
650 );
651
652 return Err(wrap_trace_alter_failure(err));
653 }
654
655 Ok(())
656 }
657
658 async fn reconcile_trace_column_types(
662 &self,
663 requests: &mut RowInsertRequests,
664 ctx: &QueryContextRef,
665 ) -> ServerResult<()> {
666 let catalog = ctx.current_catalog();
667 let schema = ctx.current_schema();
668
669 for req in &mut requests.inserts {
670 let table = self
671 .catalog_manager
672 .table(catalog, &schema, &req.table_name, None)
673 .await?;
674
675 let Some(rows) = req.rows.as_mut() else {
676 continue;
677 };
678
679 let table_schema = table.map(|table| table.schema());
680 let mut pending_rewrites = Vec::new();
681 let mut pending_alter_columns = Vec::new();
682
683 for (col_idx, col_schema) in rows.schema.iter().enumerate() {
684 let Some(current_type) = ColumnDataType::try_from(col_schema.datatype).ok() else {
685 continue;
686 };
687
688 let mut observed_types = Vec::new();
689 push_observed_trace_type(&mut observed_types, current_type);
690
691 for row in &rows.rows {
694 let Some(value) = row
695 .values
696 .get(col_idx)
697 .and_then(|value| value.value_data.as_ref())
698 else {
699 continue;
700 };
701
702 let Some(value_type) = trace_value_datatype(value) else {
703 continue;
704 };
705 push_observed_trace_type(&mut observed_types, value_type);
706 }
707
708 let existing_type = table_schema
709 .as_ref()
710 .and_then(|schema| schema.column_schema_by_name(&col_schema.column_name))
711 .and_then(|table_col| {
712 ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
713 .ok()
714 .map(|wrapper| wrapper.datatype())
715 });
716 let fixed_type = trace_semconv_fixed_type(&col_schema.column_name);
717
718 if !observed_types
719 .iter()
720 .copied()
721 .any(is_trace_reconcile_candidate_type)
722 && existing_type
723 .map(|datatype| !is_trace_reconcile_candidate_type(datatype))
724 .unwrap_or(true)
725 && fixed_type.is_none()
726 {
727 continue;
728 }
729
730 let Some(decision) = choose_trace_reconcile_decision(
733 &col_schema.column_name,
734 &observed_types,
735 existing_type,
736 )
737 .map_err(|_| {
738 enrich_trace_reconcile_error(
739 &req.table_name,
740 &col_schema.column_name,
741 &observed_types,
742 existing_type,
743 fixed_type,
744 )
745 })?
746 else {
747 continue;
748 };
749 let target_type = decision.target_type();
750
751 if !decision.requires_alter()
752 && observed_types
753 .iter()
754 .all(|observed| *observed == target_type)
755 && col_schema.datatype == target_type as i32
756 {
757 continue;
758 }
759
760 if decision.requires_alter()
761 && !pending_alter_columns.contains(&col_schema.column_name)
762 {
763 pending_alter_columns.push(col_schema.column_name.clone());
764 }
765
766 pending_rewrites.push(PendingTraceColumnRewrite {
767 col_idx,
768 target_type,
769 column_name: col_schema.column_name.clone(),
770 });
771 }
772
773 if pending_rewrites.is_empty() {
774 continue;
775 }
776
777 validate_trace_column_rewrites(&rows.rows, &pending_rewrites, &req.table_name)?;
778
779 if !pending_alter_columns.is_empty() {
780 self.alter_trace_table_columns_to_float64(
781 ctx,
782 &req.table_name,
783 &pending_alter_columns,
784 )
785 .await?;
786 }
787
788 for pending_rewrite in &pending_rewrites {
790 rows.schema[pending_rewrite.col_idx].datatype = pending_rewrite.target_type as i32;
791 }
792
793 for row in &mut rows.rows {
795 for pending_rewrite in &pending_rewrites {
796 let Some(value) = row.values.get_mut(pending_rewrite.col_idx) else {
797 continue;
798 };
799 let Some(request_type) =
800 value.value_data.as_ref().and_then(trace_value_datatype)
801 else {
802 continue;
803 };
804 if request_type == pending_rewrite.target_type {
805 continue;
806 }
807
808 value.value_data = coerce_value_data(
809 &value.value_data,
810 pending_rewrite.target_type,
811 request_type,
812 )
813 .map_err(|_| {
814 error::InvalidParameterSnafu {
815 reason: format!(
816 "failed to coerce trace column '{}' in table '{}' from {:?} to {:?}",
817 pending_rewrite.column_name,
818 req.table_name,
819 request_type,
820 pending_rewrite.target_type
821 ),
822 }
823 .build()
824 })?;
825 }
826 }
827 }
828
829 Ok(())
830 }
831}
832
833fn wrap_trace_alter_failure<E>(err: E) -> servers::error::Error
835where
836 E: ErrorExt + Send + Sync + 'static,
837{
838 error::ExecuteGrpcQuerySnafu.into_error(BoxedError::new(err))
839}
840
841#[cfg(test)]
842mod tests {
843 use common_error::ext::ErrorExt;
844 use common_error::status_code::StatusCode;
845 use servers::query_handler::TraceIngestOutcome;
846
847 use super::{ChunkFailureReaction, Instance, wrap_trace_alter_failure};
848 use crate::metrics::OTLP_TRACES_FAILURE_COUNT;
849
850 #[test]
851 fn test_classify_trace_chunk_failure() {
852 assert_eq!(
853 Instance::classify_trace_chunk_failure(StatusCode::InvalidArguments),
854 ChunkFailureReaction::RetryPerSpan
855 );
856 assert_eq!(
857 Instance::classify_trace_chunk_failure(StatusCode::InvalidSyntax),
858 ChunkFailureReaction::RetryPerSpan
859 );
860 assert_eq!(
861 Instance::classify_trace_chunk_failure(StatusCode::Unsupported),
862 ChunkFailureReaction::RetryPerSpan
863 );
864 assert_eq!(
865 Instance::classify_trace_chunk_failure(StatusCode::TableColumnNotFound),
866 ChunkFailureReaction::RetryPerSpan
867 );
868 assert_eq!(
869 Instance::classify_trace_chunk_failure(StatusCode::TableNotFound),
870 ChunkFailureReaction::RetryPerSpan
871 );
872 assert_eq!(
873 Instance::classify_trace_chunk_failure(StatusCode::DatabaseNotFound),
874 ChunkFailureReaction::DiscardChunk
875 );
876 assert_eq!(
877 Instance::classify_trace_chunk_failure(StatusCode::DeadlineExceeded),
878 ChunkFailureReaction::Propagate
879 );
880 assert_eq!(
881 Instance::classify_trace_chunk_failure(StatusCode::Cancelled),
882 ChunkFailureReaction::Propagate
883 );
884 assert_eq!(
885 Instance::classify_trace_chunk_failure(StatusCode::StorageUnavailable),
886 ChunkFailureReaction::Propagate
887 );
888 assert_eq!(
889 Instance::classify_trace_chunk_failure(StatusCode::Internal),
890 ChunkFailureReaction::Propagate
891 );
892 assert_eq!(
893 Instance::classify_trace_chunk_failure(StatusCode::RegionNotReady),
894 ChunkFailureReaction::Propagate
895 );
896 assert_eq!(
897 Instance::classify_trace_chunk_failure(StatusCode::TableUnavailable),
898 ChunkFailureReaction::Propagate
899 );
900 assert_eq!(
901 Instance::classify_trace_chunk_failure(StatusCode::RegionBusy),
902 ChunkFailureReaction::Propagate
903 );
904 assert_eq!(
905 Instance::classify_trace_chunk_failure(StatusCode::RuntimeResourcesExhausted),
906 ChunkFailureReaction::Propagate
907 );
908 }
909
910 #[test]
911 fn test_classify_trace_span_failure() {
912 assert!(Instance::should_propagate_trace_span_failure(
913 StatusCode::DeadlineExceeded
914 ));
915 assert!(Instance::should_propagate_trace_span_failure(
916 StatusCode::StorageUnavailable
917 ));
918 assert!(!Instance::should_propagate_trace_span_failure(
919 StatusCode::InvalidArguments
920 ));
921 }
922
923 #[test]
924 fn test_add_trace_write_cost() {
925 let mut outcome = TraceIngestOutcome::default();
926 Instance::add_trace_write_cost(&mut outcome, 3);
927 Instance::add_trace_write_cost(&mut outcome, 5);
928 assert_eq!(outcome.write_cost, 8);
929 }
930
931 #[test]
932 fn test_finish_trace_failure_message() {
933 let message = Instance::finish_trace_failure_message(
934 3,
935 2,
936 vec!["Rejected span trace:span (InvalidArguments)".to_string()],
937 )
938 .unwrap();
939 assert!(message.contains("Accepted 3 spans, rejected 2 spans"));
940 assert!(message.contains("Rejected span trace:span"));
941
942 assert_eq!(Instance::finish_trace_failure_message(2, 0, vec![]), None);
943 }
944
945 #[test]
946 fn test_finish_trace_failure_message_without_detail_messages() {
947 assert_eq!(
948 Instance::finish_trace_failure_message(0, 2, vec![]),
949 Some("Accepted 0 spans, rejected 2 spans".to_string())
950 );
951 }
952
953 #[test]
954 fn test_push_trace_failure_message_increments_labeled_counter() {
955 let label = "retry_per_span_counter_test";
956 let initial = OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get();
957 let mut messages = Vec::new();
958
959 Instance::push_trace_failure_message(
960 &mut messages,
961 label,
962 "Chunk fallback triggered by InvalidArguments".to_string(),
963 );
964
965 assert_eq!(messages.len(), 1);
966 assert_eq!(
967 OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get(),
968 initial + 1
969 );
970 }
971
972 #[test]
973 fn test_push_trace_failure_message_caps_recorded_messages() {
974 let label = "retry_per_span_limit_test";
975 let mut messages = Vec::new();
976
977 for idx in 0..=4 {
978 Instance::push_trace_failure_message(&mut messages, label, format!("failure-{idx}"));
979 }
980
981 assert_eq!(messages.len(), 4);
982 assert_eq!(
983 messages,
984 vec![
985 "failure-0".to_string(),
986 "failure-1".to_string(),
987 "failure-2".to_string(),
988 "failure-3".to_string()
989 ]
990 );
991 }
992
993 #[test]
994 fn test_classify_trace_chunk_failure_defaults_to_discard() {
995 assert_eq!(
996 Instance::classify_trace_chunk_failure(StatusCode::Unknown),
997 ChunkFailureReaction::DiscardChunk
998 );
999 }
1000
1001 #[test]
1002 fn test_wrap_trace_alter_failure_preserves_status_code() {
1003 let err = wrap_trace_alter_failure(
1004 servers::error::TableNotFoundSnafu {
1005 catalog: "greptime".to_string(),
1006 schema: "public".to_string(),
1007 table: "trace_type_missing".to_string(),
1008 }
1009 .build(),
1010 );
1011
1012 assert_eq!(err.status_code(), StatusCode::TableNotFound);
1013 }
1014}