1use std::sync::Arc;
16
17use api::helper::ColumnDataTypeWrapper;
18use api::v1::alter_table_expr::Kind;
19use api::v1::{
20 AlterTableExpr, ColumnDataType, ModifyColumnType, ModifyColumnTypes, RowInsertRequests,
21};
22use async_trait::async_trait;
23use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
24use client::Output;
25use common_error::ext::{BoxedError, ErrorExt};
26use common_error::status_code::StatusCode;
27use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
28use common_telemetry::{tracing, warn};
29use itertools::Itertools;
30use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
31use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
32use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
33use pipeline::{GreptimePipelineParams, PipelineWay};
34use servers::error::{self, AuthSnafu, Result as ServerResult};
35use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
36use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
37use servers::otlp;
38use servers::otlp::coerce::{coerce_value_data, trace_value_datatype};
39use servers::otlp::trace::TraceAuxData;
40use servers::otlp::trace::span::{TraceSpan, TraceSpanGroup};
41use servers::query_handler::{
42 OpenTelemetryProtocolHandler, PipelineHandlerRef, TraceIngestOutcome,
43};
44use session::context::QueryContextRef;
45use snafu::{IntoError, ResultExt};
46use table::requests::{
47 OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM, SEMANTIC_PER_TABLE_INDEX_KEY,
48 SEMANTIC_PIPELINE, SEMANTIC_SIGNAL_TYPE, SEMANTIC_SOURCE, SEMANTIC_TRACE_CONVENTIONS,
49 SEMANTIC_VALUE_MIXED, SEMANTIC_VALUE_UNKNOWN, SIGNAL_TYPE_LOG, SIGNAL_TYPE_METRIC,
50 SIGNAL_TYPE_TRACE, SOURCE_OPENTELEMETRY, TABLE_DATA_MODEL_TRACE_V1,
51};
52
53use crate::instance::Instance;
54use crate::instance::otlp::trace_semconv::trace_semconv_fixed_type;
55use crate::instance::otlp::trace_types::{
56 PendingTraceColumnRewrite, choose_trace_reconcile_decision, enrich_trace_reconcile_error,
57 is_trace_reconcile_candidate_type, push_observed_trace_type, validate_trace_column_rewrites,
58};
59use crate::metrics::{
60 OTLP_LOGS_ROWS, OTLP_METRICS_ROWS, OTLP_TRACES_FAILURE_COUNT, OTLP_TRACES_ROWS,
61};
62
63pub mod trace_semconv;
64pub mod trace_types;
65
66const TRACE_INGEST_CHUNK_SIZE: usize = 64;
67const TRACE_FAILURE_MESSAGE_LIMIT: usize = 4;
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70enum ChunkFailureReaction {
71 RetryPerSpan,
72 DiscardChunk,
73 Propagate,
74}
75
76impl ChunkFailureReaction {
77 fn as_metric_label(self) -> &'static str {
78 match self {
79 Self::RetryPerSpan => "retry_per_span",
80 Self::DiscardChunk => "discard_chunk",
81 Self::Propagate => "propagate_failure",
82 }
83 }
84}
85
86struct TraceChunkIngestContext<'a> {
87 pipeline_handler: PipelineHandlerRef,
88 pipeline: &'a PipelineWay,
89 pipeline_params: &'a GreptimePipelineParams,
90 table_name: &'a str,
91 is_trace_v1_model: bool,
92}
93
94struct TraceIngestState {
95 aux_data: TraceAuxData,
96 outcome: TraceIngestOutcome,
97 failure_messages: Vec<String>,
98}
99
100#[async_trait]
101impl OpenTelemetryProtocolHandler for Instance {
102 #[tracing::instrument(skip_all)]
103 async fn metrics(
104 &self,
105 request: ExportMetricsServiceRequest,
106 ctx: QueryContextRef,
107 ) -> ServerResult<Output> {
108 self.plugins
109 .get::<PermissionCheckerRef>()
110 .as_ref()
111 .check_permission(ctx.current_user(), PermissionReq::Otlp)
112 .context(AuthSnafu)?;
113
114 let interceptor_ref = self
115 .plugins
116 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
117 interceptor_ref.pre_execute(ctx.clone())?;
118
119 let input_names = request
120 .resource_metrics
121 .iter()
122 .flat_map(|r| r.scope_metrics.iter())
123 .flat_map(|s| s.metrics.iter().map(|m| &m.name))
124 .collect::<Vec<_>>();
125
126 let is_legacy = self.check_otlp_legacy(&input_names, ctx.clone()).await?;
128
129 let mut metric_ctx = ctx
130 .protocol_ctx()
131 .get_otlp_metric_ctx()
132 .cloned()
133 .unwrap_or_default();
134 metric_ctx.is_legacy = is_legacy;
135
136 let (requests, rows, semantic_index) =
137 otlp::metrics::to_grpc_insert_requests(request, &mut metric_ctx)?;
138 OTLP_METRICS_ROWS.inc_by(rows as u64);
139
140 let ctx = {
141 let mut c = (*ctx).clone();
142 c.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_METRIC);
143 c.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
144 if let Some(index) = semantic_index.encode() {
147 c.set_extension(SEMANTIC_PER_TABLE_INDEX_KEY, index);
148 }
149 if !is_legacy {
150 c.set_extension(OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM.to_string());
151 }
152 Arc::new(c)
153 };
154
155 if metric_ctx.is_legacy || !metric_ctx.with_metric_engine {
157 self.handle_row_inserts(requests, ctx, false, false)
158 .await
159 .map_err(BoxedError::new)
160 .context(error::ExecuteGrpcQuerySnafu)
161 } else {
162 let physical_table = ctx
163 .extension(PHYSICAL_TABLE_PARAM)
164 .unwrap_or(GREPTIME_PHYSICAL_TABLE)
165 .to_string();
166 self.handle_metric_row_inserts(requests, ctx, physical_table.clone())
167 .await
168 .map_err(BoxedError::new)
169 .context(error::ExecuteGrpcQuerySnafu)
170 }
171 }
172
173 #[tracing::instrument(skip_all)]
174 async fn traces(
175 &self,
176 pipeline_handler: PipelineHandlerRef,
177 request: ExportTraceServiceRequest,
178 pipeline: PipelineWay,
179 pipeline_params: GreptimePipelineParams,
180 table_name: String,
181 ctx: QueryContextRef,
182 ) -> ServerResult<TraceIngestOutcome> {
183 self.plugins
184 .get::<PermissionCheckerRef>()
185 .as_ref()
186 .check_permission(ctx.current_user(), PermissionReq::Otlp)
187 .context(AuthSnafu)?;
188
189 let interceptor_ref = self
190 .plugins
191 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
192 interceptor_ref.pre_execute(ctx.clone())?;
193
194 let conventions = trace_conventions(&request);
196 let spans = otlp::trace::span::parse(request);
197 self.ingest_trace_spans(
198 pipeline_handler,
199 &pipeline,
200 &pipeline_params,
201 table_name,
202 spans,
203 &conventions,
204 ctx,
205 )
206 .await
207 }
208
209 #[tracing::instrument(skip_all)]
210 async fn logs(
211 &self,
212 pipeline_handler: PipelineHandlerRef,
213 request: ExportLogsServiceRequest,
214 pipeline: PipelineWay,
215 pipeline_params: GreptimePipelineParams,
216 table_name: String,
217 ctx: QueryContextRef,
218 ) -> ServerResult<Vec<Output>> {
219 self.plugins
220 .get::<PermissionCheckerRef>()
221 .as_ref()
222 .check_permission(ctx.current_user(), PermissionReq::Otlp)
223 .context(AuthSnafu)?;
224
225 let interceptor_ref = self
226 .plugins
227 .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
228 interceptor_ref.pre_execute(ctx.clone())?;
229
230 let ctx = {
233 let mut c = (*ctx).clone();
234 c.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_LOG);
235 c.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
236 Arc::new(c)
237 };
238
239 let opt_req = otlp::logs::to_grpc_insert_requests(
240 request,
241 pipeline,
242 pipeline_params,
243 table_name,
244 &ctx,
245 pipeline_handler,
246 )
247 .await?;
248
249 let mut outputs = vec![];
250
251 for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {
252 let cnt = requests
253 .inserts
254 .iter()
255 .filter_map(|r| r.rows.as_ref().map(|r| r.rows.len()))
256 .sum::<usize>();
257
258 let o = self
259 .handle_log_inserts(requests, temp_ctx)
260 .await
261 .inspect(|_| OTLP_LOGS_ROWS.inc_by(cnt as u64))
262 .map_err(BoxedError::new)
263 .context(error::ExecuteGrpcQuerySnafu)?;
264 outputs.push(o);
265 }
266
267 Ok(outputs)
268 }
269}
270
271impl Instance {
272 #[allow(clippy::too_many_arguments)]
275 async fn ingest_trace_spans(
276 &self,
277 pipeline_handler: PipelineHandlerRef,
278 pipeline: &PipelineWay,
279 pipeline_params: &GreptimePipelineParams,
280 table_name: String,
281 groups: Vec<TraceSpanGroup>,
282 conventions: &str,
283 ctx: QueryContextRef,
284 ) -> ServerResult<TraceIngestOutcome> {
285 let is_trace_v1_model = matches!(pipeline, PipelineWay::OtlpTraceDirectV1);
286
287 let main_ctx = {
290 let mut c = (*ctx).clone();
291 c.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_TRACE);
292 c.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
293 if is_trace_v1_model {
294 c.set_extension(SEMANTIC_PIPELINE, TABLE_DATA_MODEL_TRACE_V1);
295 c.set_extension(SEMANTIC_TRACE_CONVENTIONS, conventions);
296 }
297 Arc::new(c)
298 };
299
300 let ingest_ctx = TraceChunkIngestContext {
301 pipeline_handler,
302 pipeline,
303 pipeline_params,
304 table_name: &table_name,
305 is_trace_v1_model,
306 };
307 let mut ingest_state = TraceIngestState {
308 aux_data: TraceAuxData::default(),
309 outcome: TraceIngestOutcome::default(),
310 failure_messages: Vec::new(),
311 };
312
313 for group in groups {
314 let chunks = group
315 .spans
316 .into_iter()
317 .chunks(TRACE_INGEST_CHUNK_SIZE)
318 .into_iter()
319 .map(|chunk| chunk.collect::<Vec<_>>())
320 .collect::<Vec<_>>();
321 for chunk in chunks {
322 self.ingest_trace_chunk(&ingest_ctx, chunk, main_ctx.clone(), &mut ingest_state)
323 .await?;
324 }
325 }
326
327 OTLP_TRACES_ROWS.inc_by(ingest_state.outcome.accepted_spans as u64);
328
329 if !ingest_state.aux_data.is_empty() {
330 let (aux_requests, _) = otlp::trace::to_grpc_insert_requests_for_aux_tables(
334 std::mem::take(&mut ingest_state.aux_data),
335 ingest_ctx.pipeline,
336 ingest_ctx.table_name,
337 )?;
338
339 if !aux_requests.inserts.is_empty() {
340 match self
341 .insert_trace_requests(aux_requests, ingest_ctx.is_trace_v1_model, ctx)
342 .await
343 {
344 Ok(output) => {
345 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
346 }
347 Err(err) => {
348 Self::push_trace_failure_message(
349 &mut ingest_state.failure_messages,
350 "aux_table_update_failed",
351 format!(
352 "Auxiliary trace tables were not fully updated ({})",
353 err.status_code().as_ref()
354 ),
355 );
356 }
357 }
358 }
359 }
360
361 ingest_state.outcome.error_message = Self::finish_trace_failure_message(
362 ingest_state.outcome.accepted_spans,
363 ingest_state.outcome.rejected_spans,
364 ingest_state.failure_messages,
365 );
366
367 Ok(ingest_state.outcome)
368 }
369
370 async fn ingest_trace_chunk(
373 &self,
374 ingest_ctx: &TraceChunkIngestContext<'_>,
375 chunk: Vec<TraceSpan>,
376 ctx: QueryContextRef,
377 ingest_state: &mut TraceIngestState,
378 ) -> ServerResult<()> {
379 let (requests, chunk_rows) = otlp::trace::to_grpc_insert_requests_from_spans(
382 &chunk,
383 ingest_ctx.pipeline,
384 ingest_ctx.pipeline_params,
385 ingest_ctx.table_name,
386 &ctx,
387 ingest_ctx.pipeline_handler.clone(),
388 )?;
389
390 match self
391 .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
392 .await
393 {
394 Ok(output) => {
395 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
396 ingest_state.outcome.accepted_spans += chunk_rows;
397 for span in &chunk {
398 ingest_state.aux_data.observe_span(span);
399 }
400 }
401 Err(err) => match Self::classify_trace_chunk_failure(err.status_code()) {
402 ChunkFailureReaction::RetryPerSpan => {
403 Self::push_trace_failure_message(
404 &mut ingest_state.failure_messages,
405 ChunkFailureReaction::RetryPerSpan.as_metric_label(),
406 format!("Chunk fallback triggered by {}", err.status_code().as_ref()),
407 );
408 self.ingest_trace_chunk_span_by_span(
414 ingest_ctx,
415 chunk,
416 ctx.clone(),
417 ingest_state,
418 )
419 .await?;
420 }
421 ChunkFailureReaction::DiscardChunk => {
422 ingest_state.outcome.rejected_spans += chunk.len();
423 Self::push_trace_failure_message(
424 &mut ingest_state.failure_messages,
425 ChunkFailureReaction::DiscardChunk.as_metric_label(),
426 format!(
427 "Discarded {} spans after ambiguous chunk failure ({})",
428 chunk.len(),
429 err.status_code().as_ref()
430 ),
431 );
432 }
435 ChunkFailureReaction::Propagate => {
439 Self::push_trace_failure_message(
440 &mut ingest_state.failure_messages,
441 ChunkFailureReaction::Propagate.as_metric_label(),
442 format!(
443 "Propagating retryable chunk failure ({})",
444 err.status_code().as_ref()
445 ),
446 );
447 return Err(err);
448 }
449 },
450 }
451
452 Ok(())
453 }
454
455 async fn ingest_trace_chunk_span_by_span(
457 &self,
458 ingest_ctx: &TraceChunkIngestContext<'_>,
459 chunk: Vec<TraceSpan>,
460 ctx: QueryContextRef,
461 ingest_state: &mut TraceIngestState,
462 ) -> ServerResult<()> {
463 for span in chunk {
464 let (requests, rows) = otlp::trace::to_grpc_insert_requests_from_spans(
465 std::slice::from_ref(&span),
466 ingest_ctx.pipeline,
467 ingest_ctx.pipeline_params,
468 ingest_ctx.table_name,
469 &ctx,
470 ingest_ctx.pipeline_handler.clone(),
471 )?;
472
473 match self
474 .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
475 .await
476 {
477 Ok(output) => {
478 Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
479 ingest_state.outcome.accepted_spans += rows;
480 ingest_state.aux_data.observe_span(&span);
481 }
482 Err(err) => {
483 if Self::should_propagate_trace_span_failure(err.status_code()) {
484 Self::push_trace_failure_message(
485 &mut ingest_state.failure_messages,
486 ChunkFailureReaction::Propagate.as_metric_label(),
487 format!(
488 "Propagating retryable span failure for {}:{} ({})",
489 span.trace_id,
490 span.span_id,
491 err.status_code().as_ref()
492 ),
493 );
494 return Err(err);
495 }
496
497 ingest_state.outcome.rejected_spans += 1;
498 Self::push_trace_failure_message(
499 &mut ingest_state.failure_messages,
500 "span_rejected",
501 format!(
502 "Rejected span {}:{} ({})",
503 span.trace_id,
504 span.span_id,
505 err.status_code().as_ref()
506 ),
507 );
508 }
509 }
510 }
511
512 Ok(())
513 }
514
515 async fn insert_trace_requests(
517 &self,
518 mut requests: RowInsertRequests,
519 is_trace_v1_model: bool,
520 ctx: QueryContextRef,
521 ) -> ServerResult<Output> {
522 if is_trace_v1_model {
523 self.reconcile_trace_column_types(&mut requests, &ctx)
524 .await?;
525 self.handle_trace_inserts(requests, ctx)
526 .await
527 .map_err(BoxedError::new)
528 .context(error::ExecuteGrpcQuerySnafu)
529 } else {
530 self.handle_log_inserts(requests, ctx)
531 .await
532 .map_err(BoxedError::new)
533 .context(error::ExecuteGrpcQuerySnafu)
534 }
535 }
536
537 fn classify_trace_chunk_failure(status: StatusCode) -> ChunkFailureReaction {
538 match status {
539 StatusCode::InvalidArguments
540 | StatusCode::InvalidSyntax
541 | StatusCode::Unsupported
542 | StatusCode::TableNotFound
543 | StatusCode::TableColumnNotFound => ChunkFailureReaction::RetryPerSpan,
544 StatusCode::DatabaseNotFound => ChunkFailureReaction::DiscardChunk,
545 StatusCode::Cancelled | StatusCode::DeadlineExceeded => ChunkFailureReaction::Propagate,
546 _ if status.is_retryable() => ChunkFailureReaction::Propagate,
547 _ => ChunkFailureReaction::DiscardChunk,
548 }
549 }
550
551 fn should_propagate_trace_span_failure(status: StatusCode) -> bool {
552 matches!(
553 Self::classify_trace_chunk_failure(status),
554 ChunkFailureReaction::Propagate
555 )
556 }
557
558 fn add_trace_write_cost(outcome: &mut TraceIngestOutcome, cost: usize) {
559 outcome.write_cost += cost;
560 }
561
562 fn push_trace_failure_message(messages: &mut Vec<String>, label: &str, message: String) {
563 OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).inc();
564
565 if messages.len() < TRACE_FAILURE_MESSAGE_LIMIT {
566 messages.push(message);
567 } else if messages.len() == TRACE_FAILURE_MESSAGE_LIMIT {
568 tracing::debug!(
569 label,
570 limit = TRACE_FAILURE_MESSAGE_LIMIT,
571 "Trace ingest failure message limit reached; suppressing additional failure details"
572 );
573 }
574 }
575
576 fn finish_trace_failure_message(
577 accepted_spans: usize,
578 rejected_spans: usize,
579 messages: Vec<String>,
580 ) -> Option<String> {
581 if rejected_spans == 0 && messages.is_empty() {
582 return None;
583 }
584
585 let mut summary = format!(
586 "Accepted {} spans, rejected {} spans",
587 accepted_spans, rejected_spans
588 );
589
590 if !messages.is_empty() {
591 summary.push_str(": ");
592 summary.push_str(&messages.join("; "));
593 }
594
595 Some(summary)
596 }
597
598 async fn alter_trace_table_columns_to_float64(
600 &self,
601 ctx: &QueryContextRef,
602 table_name: &str,
603 column_names: &[String],
604 ) -> ServerResult<()> {
605 let catalog_name = ctx.current_catalog().to_string();
606 let schema_name = ctx.current_schema();
607 let alter_expr = AlterTableExpr {
608 catalog_name: catalog_name.clone(),
609 schema_name: schema_name.clone(),
610 table_name: table_name.to_string(),
611 kind: Some(Kind::ModifyColumnTypes(ModifyColumnTypes {
612 modify_column_types: column_names
613 .iter()
614 .map(|column_name| ModifyColumnType {
615 column_name: column_name.clone(),
616 target_type: ColumnDataType::Float64 as i32,
617 target_type_extension: None,
618 })
619 .collect(),
620 })),
621 };
622
623 if let Err(err) = self
624 .statement_executor
625 .alter_table_inner(alter_expr, ctx.clone())
626 .await
627 {
628 let table = self
629 .catalog_manager
630 .table(&catalog_name, &schema_name, table_name, None)
631 .await
632 .map_err(servers::error::Error::from)?;
633 let alter_already_applied = table
634 .map(|table| {
635 let table_schema = table.schema();
636 column_names.iter().all(|column_name| {
637 table_schema
638 .column_schema_by_name(column_name)
639 .and_then(|table_col| {
640 ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
641 .ok()
642 .map(|wrapper| wrapper.datatype())
643 })
644 == Some(ColumnDataType::Float64)
645 })
646 })
647 .unwrap_or(false);
648
649 if alter_already_applied {
650 return Ok(());
651 }
652
653 warn!(
654 table_name,
655 columns = ?column_names,
656 error = %err,
657 "failed to widen trace columns before insert"
658 );
659
660 return Err(wrap_trace_alter_failure(err));
661 }
662
663 Ok(())
664 }
665
666 async fn reconcile_trace_column_types(
670 &self,
671 requests: &mut RowInsertRequests,
672 ctx: &QueryContextRef,
673 ) -> ServerResult<()> {
674 let catalog = ctx.current_catalog();
675 let schema = ctx.current_schema();
676
677 for req in &mut requests.inserts {
678 let table = self
679 .catalog_manager
680 .table(catalog, &schema, &req.table_name, None)
681 .await?;
682
683 let Some(rows) = req.rows.as_mut() else {
684 continue;
685 };
686
687 let table_schema = table.map(|table| table.schema());
688 let mut pending_rewrites = Vec::new();
689 let mut pending_alter_columns = Vec::new();
690
691 for (col_idx, col_schema) in rows.schema.iter().enumerate() {
692 let Some(current_type) = ColumnDataType::try_from(col_schema.datatype).ok() else {
693 continue;
694 };
695
696 let mut observed_types = Vec::new();
697 push_observed_trace_type(&mut observed_types, current_type);
698
699 for row in &rows.rows {
702 let Some(value) = row
703 .values
704 .get(col_idx)
705 .and_then(|value| value.value_data.as_ref())
706 else {
707 continue;
708 };
709
710 let Some(value_type) = trace_value_datatype(value) else {
711 continue;
712 };
713 push_observed_trace_type(&mut observed_types, value_type);
714 }
715
716 let existing_type = table_schema
717 .as_ref()
718 .and_then(|schema| schema.column_schema_by_name(&col_schema.column_name))
719 .and_then(|table_col| {
720 ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
721 .ok()
722 .map(|wrapper| wrapper.datatype())
723 });
724 let fixed_type = trace_semconv_fixed_type(&col_schema.column_name);
725
726 if !observed_types
727 .iter()
728 .copied()
729 .any(is_trace_reconcile_candidate_type)
730 && existing_type
731 .map(|datatype| !is_trace_reconcile_candidate_type(datatype))
732 .unwrap_or(true)
733 && fixed_type.is_none()
734 {
735 continue;
736 }
737
738 let Some(decision) = choose_trace_reconcile_decision(
741 &col_schema.column_name,
742 &observed_types,
743 existing_type,
744 )
745 .map_err(|_| {
746 enrich_trace_reconcile_error(
747 &req.table_name,
748 &col_schema.column_name,
749 &observed_types,
750 existing_type,
751 fixed_type,
752 )
753 })?
754 else {
755 continue;
756 };
757 let target_type = decision.target_type();
758
759 if !decision.requires_alter()
760 && observed_types
761 .iter()
762 .all(|observed| *observed == target_type)
763 && col_schema.datatype == target_type as i32
764 {
765 continue;
766 }
767
768 if decision.requires_alter()
769 && !pending_alter_columns.contains(&col_schema.column_name)
770 {
771 pending_alter_columns.push(col_schema.column_name.clone());
772 }
773
774 pending_rewrites.push(PendingTraceColumnRewrite {
775 col_idx,
776 target_type,
777 column_name: col_schema.column_name.clone(),
778 });
779 }
780
781 if pending_rewrites.is_empty() {
782 continue;
783 }
784
785 validate_trace_column_rewrites(&rows.rows, &pending_rewrites, &req.table_name)?;
786
787 if !pending_alter_columns.is_empty() {
788 self.alter_trace_table_columns_to_float64(
789 ctx,
790 &req.table_name,
791 &pending_alter_columns,
792 )
793 .await?;
794 }
795
796 for pending_rewrite in &pending_rewrites {
798 rows.schema[pending_rewrite.col_idx].datatype = pending_rewrite.target_type as i32;
799 }
800
801 for row in &mut rows.rows {
803 for pending_rewrite in &pending_rewrites {
804 let Some(value) = row.values.get_mut(pending_rewrite.col_idx) else {
805 continue;
806 };
807 let Some(request_type) =
808 value.value_data.as_ref().and_then(trace_value_datatype)
809 else {
810 continue;
811 };
812 if request_type == pending_rewrite.target_type {
813 continue;
814 }
815
816 value.value_data = coerce_value_data(
817 &value.value_data,
818 pending_rewrite.target_type,
819 request_type,
820 )
821 .map_err(|_| {
822 error::InvalidParameterSnafu {
823 reason: format!(
824 "failed to coerce trace column '{}' in table '{}' from {:?} to {:?}",
825 pending_rewrite.column_name,
826 req.table_name,
827 request_type,
828 pending_rewrite.target_type
829 ),
830 }
831 .build()
832 })?;
833 }
834 }
835 }
836
837 Ok(())
838 }
839}
840
841fn wrap_trace_alter_failure<E>(err: E) -> servers::error::Error
843where
844 E: ErrorExt + Send + Sync + 'static,
845{
846 error::ExecuteGrpcQuerySnafu.into_error(BoxedError::new(err))
847}
848
849fn trace_conventions(request: &ExportTraceServiceRequest) -> String {
854 let mut seen: Option<&str> = None;
855 let mut mixed = false;
856
857 for resource_spans in &request.resource_spans {
858 let urls = std::iter::once(resource_spans.schema_url.as_str()).chain(
859 resource_spans
860 .scope_spans
861 .iter()
862 .map(|s| s.schema_url.as_str()),
863 );
864 for url in urls {
865 if url.is_empty() {
866 continue;
867 }
868 match seen {
869 None => seen = Some(url),
870 Some(prev) if prev == url => {}
871 Some(_) => {
872 mixed = true;
873 break;
874 }
875 }
876 }
877 if mixed {
878 break;
879 }
880 }
881
882 if mixed {
883 SEMANTIC_VALUE_MIXED.to_string()
884 } else {
885 seen.map(str::to_string)
886 .unwrap_or_else(|| SEMANTIC_VALUE_UNKNOWN.to_string())
887 }
888}
889
890#[cfg(test)]
891mod tests {
892 use common_error::ext::ErrorExt;
893 use common_error::status_code::StatusCode;
894 use servers::query_handler::TraceIngestOutcome;
895
896 use super::{ChunkFailureReaction, Instance, wrap_trace_alter_failure};
897 use crate::metrics::OTLP_TRACES_FAILURE_COUNT;
898
899 #[test]
900 fn test_classify_trace_chunk_failure() {
901 assert_eq!(
902 Instance::classify_trace_chunk_failure(StatusCode::InvalidArguments),
903 ChunkFailureReaction::RetryPerSpan
904 );
905 assert_eq!(
906 Instance::classify_trace_chunk_failure(StatusCode::InvalidSyntax),
907 ChunkFailureReaction::RetryPerSpan
908 );
909 assert_eq!(
910 Instance::classify_trace_chunk_failure(StatusCode::Unsupported),
911 ChunkFailureReaction::RetryPerSpan
912 );
913 assert_eq!(
914 Instance::classify_trace_chunk_failure(StatusCode::TableColumnNotFound),
915 ChunkFailureReaction::RetryPerSpan
916 );
917 assert_eq!(
918 Instance::classify_trace_chunk_failure(StatusCode::TableNotFound),
919 ChunkFailureReaction::RetryPerSpan
920 );
921 assert_eq!(
922 Instance::classify_trace_chunk_failure(StatusCode::DatabaseNotFound),
923 ChunkFailureReaction::DiscardChunk
924 );
925 assert_eq!(
926 Instance::classify_trace_chunk_failure(StatusCode::DeadlineExceeded),
927 ChunkFailureReaction::Propagate
928 );
929 assert_eq!(
930 Instance::classify_trace_chunk_failure(StatusCode::Cancelled),
931 ChunkFailureReaction::Propagate
932 );
933 assert_eq!(
934 Instance::classify_trace_chunk_failure(StatusCode::StorageUnavailable),
935 ChunkFailureReaction::Propagate
936 );
937 assert_eq!(
938 Instance::classify_trace_chunk_failure(StatusCode::Internal),
939 ChunkFailureReaction::Propagate
940 );
941 assert_eq!(
942 Instance::classify_trace_chunk_failure(StatusCode::RegionNotReady),
943 ChunkFailureReaction::Propagate
944 );
945 assert_eq!(
946 Instance::classify_trace_chunk_failure(StatusCode::TableUnavailable),
947 ChunkFailureReaction::Propagate
948 );
949 assert_eq!(
950 Instance::classify_trace_chunk_failure(StatusCode::RegionBusy),
951 ChunkFailureReaction::Propagate
952 );
953 assert_eq!(
954 Instance::classify_trace_chunk_failure(StatusCode::RuntimeResourcesExhausted),
955 ChunkFailureReaction::Propagate
956 );
957 }
958
959 #[test]
960 fn test_classify_trace_span_failure() {
961 assert!(Instance::should_propagate_trace_span_failure(
962 StatusCode::DeadlineExceeded
963 ));
964 assert!(Instance::should_propagate_trace_span_failure(
965 StatusCode::StorageUnavailable
966 ));
967 assert!(!Instance::should_propagate_trace_span_failure(
968 StatusCode::InvalidArguments
969 ));
970 }
971
972 #[test]
973 fn test_add_trace_write_cost() {
974 let mut outcome = TraceIngestOutcome::default();
975 Instance::add_trace_write_cost(&mut outcome, 3);
976 Instance::add_trace_write_cost(&mut outcome, 5);
977 assert_eq!(outcome.write_cost, 8);
978 }
979
980 #[test]
981 fn test_finish_trace_failure_message() {
982 let message = Instance::finish_trace_failure_message(
983 3,
984 2,
985 vec!["Rejected span trace:span (InvalidArguments)".to_string()],
986 )
987 .unwrap();
988 assert!(message.contains("Accepted 3 spans, rejected 2 spans"));
989 assert!(message.contains("Rejected span trace:span"));
990
991 assert_eq!(Instance::finish_trace_failure_message(2, 0, vec![]), None);
992 }
993
994 #[test]
995 fn test_finish_trace_failure_message_without_detail_messages() {
996 assert_eq!(
997 Instance::finish_trace_failure_message(0, 2, vec![]),
998 Some("Accepted 0 spans, rejected 2 spans".to_string())
999 );
1000 }
1001
1002 #[test]
1003 fn test_push_trace_failure_message_increments_labeled_counter() {
1004 let label = "retry_per_span_counter_test";
1005 let initial = OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get();
1006 let mut messages = Vec::new();
1007
1008 Instance::push_trace_failure_message(
1009 &mut messages,
1010 label,
1011 "Chunk fallback triggered by InvalidArguments".to_string(),
1012 );
1013
1014 assert_eq!(messages.len(), 1);
1015 assert_eq!(
1016 OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get(),
1017 initial + 1
1018 );
1019 }
1020
1021 #[test]
1022 fn test_push_trace_failure_message_caps_recorded_messages() {
1023 let label = "retry_per_span_limit_test";
1024 let mut messages = Vec::new();
1025
1026 for idx in 0..=4 {
1027 Instance::push_trace_failure_message(&mut messages, label, format!("failure-{idx}"));
1028 }
1029
1030 assert_eq!(messages.len(), 4);
1031 assert_eq!(
1032 messages,
1033 vec![
1034 "failure-0".to_string(),
1035 "failure-1".to_string(),
1036 "failure-2".to_string(),
1037 "failure-3".to_string()
1038 ]
1039 );
1040 }
1041
1042 #[test]
1043 fn test_classify_trace_chunk_failure_defaults_to_discard() {
1044 assert_eq!(
1045 Instance::classify_trace_chunk_failure(StatusCode::Unknown),
1046 ChunkFailureReaction::DiscardChunk
1047 );
1048 }
1049
1050 #[test]
1051 fn test_wrap_trace_alter_failure_preserves_status_code() {
1052 let err = wrap_trace_alter_failure(
1053 servers::error::TableNotFoundSnafu {
1054 catalog: "greptime".to_string(),
1055 schema: "public".to_string(),
1056 table: "trace_type_missing".to_string(),
1057 }
1058 .build(),
1059 );
1060
1061 assert_eq!(err.status_code(), StatusCode::TableNotFound);
1062 }
1063
1064 use opentelemetry_proto::tonic::trace::v1::{ResourceSpans, ScopeSpans};
1065
1066 use super::{ExportTraceServiceRequest, trace_conventions};
1067
1068 fn resource_spans(resource_url: &str, scope_urls: &[&str]) -> ResourceSpans {
1069 ResourceSpans {
1070 schema_url: resource_url.to_string(),
1071 scope_spans: scope_urls
1072 .iter()
1073 .map(|u| ScopeSpans {
1074 schema_url: u.to_string(),
1075 ..Default::default()
1076 })
1077 .collect(),
1078 ..Default::default()
1079 }
1080 }
1081
1082 #[test]
1083 fn test_trace_conventions() {
1084 let unknown = ExportTraceServiceRequest::default();
1085 assert_eq!(trace_conventions(&unknown), "unknown");
1086
1087 let url = "https://opentelemetry.io/schemas/1.27.0";
1088 let single = ExportTraceServiceRequest {
1089 resource_spans: vec![resource_spans("", &[url, url])],
1090 };
1091 assert_eq!(trace_conventions(&single), url);
1092
1093 let resource_level = ExportTraceServiceRequest {
1094 resource_spans: vec![resource_spans(url, &[""])],
1095 };
1096 assert_eq!(trace_conventions(&resource_level), url);
1097
1098 let conflicting = ExportTraceServiceRequest {
1099 resource_spans: vec![resource_spans(
1100 "",
1101 &[url, "https://opentelemetry.io/schemas/1.30.0"],
1102 )],
1103 };
1104 assert_eq!(trace_conventions(&conflicting), "mixed");
1105 }
1106}