Skip to main content

frontend/instance/
otlp.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use api::helper::ColumnDataTypeWrapper;
18use api::v1::{ColumnDataType, RowInsertRequests};
19use async_trait::async_trait;
20use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
21use client::Output;
22use common_error::ext::{BoxedError, ErrorExt};
23use common_error::status_code::StatusCode;
24use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
25use common_telemetry::tracing;
26use itertools::Itertools;
27use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
28use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
29use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
30use pipeline::{GreptimePipelineParams, PipelineWay};
31use servers::error::{self, AuthSnafu, Result as ServerResult};
32use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
33use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
34use servers::otlp;
35use servers::otlp::trace::TraceAuxData;
36use servers::otlp::trace::coerce::{
37    coerce_value_data, is_supported_trace_coercion, resolve_new_trace_column_type,
38    trace_value_datatype,
39};
40use servers::otlp::trace::span::{TraceSpan, TraceSpanGroup};
41use servers::query_handler::{
42    OpenTelemetryProtocolHandler, PipelineHandlerRef, TraceIngestOutcome,
43};
44use session::context::QueryContextRef;
45use snafu::ResultExt;
46use table::requests::{OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM};
47
48use crate::instance::Instance;
49use crate::metrics::{
50    OTLP_LOGS_ROWS, OTLP_METRICS_ROWS, OTLP_TRACES_FAILURE_COUNT, OTLP_TRACES_ROWS,
51};
52
53const TRACE_INGEST_CHUNK_SIZE: usize = 64;
54const TRACE_FAILURE_MESSAGE_LIMIT: usize = 4;
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57enum ChunkFailureReaction {
58    RetryPerSpan,
59    DiscardChunk,
60    Propagate,
61}
62
63impl ChunkFailureReaction {
64    fn as_metric_label(self) -> &'static str {
65        match self {
66            Self::RetryPerSpan => "retry_per_span",
67            Self::DiscardChunk => "discard_chunk",
68            Self::Propagate => "propagate_failure",
69        }
70    }
71}
72
73struct TraceChunkIngestContext<'a> {
74    pipeline_handler: PipelineHandlerRef,
75    pipeline: &'a PipelineWay,
76    pipeline_params: &'a GreptimePipelineParams,
77    table_name: &'a str,
78    is_trace_v1_model: bool,
79}
80
81struct TraceIngestState {
82    aux_data: TraceAuxData,
83    outcome: TraceIngestOutcome,
84    failure_messages: Vec<String>,
85}
86
87#[async_trait]
88impl OpenTelemetryProtocolHandler for Instance {
89    #[tracing::instrument(skip_all)]
90    async fn metrics(
91        &self,
92        request: ExportMetricsServiceRequest,
93        ctx: QueryContextRef,
94    ) -> ServerResult<Output> {
95        self.plugins
96            .get::<PermissionCheckerRef>()
97            .as_ref()
98            .check_permission(ctx.current_user(), PermissionReq::Otlp)
99            .context(AuthSnafu)?;
100
101        let interceptor_ref = self
102            .plugins
103            .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
104        interceptor_ref.pre_execute(ctx.clone())?;
105
106        let input_names = request
107            .resource_metrics
108            .iter()
109            .flat_map(|r| r.scope_metrics.iter())
110            .flat_map(|s| s.metrics.iter().map(|m| &m.name))
111            .collect::<Vec<_>>();
112
113        // See [`OtlpMetricCtx`] for details
114        let is_legacy = self.check_otlp_legacy(&input_names, ctx.clone()).await?;
115
116        let mut metric_ctx = ctx
117            .protocol_ctx()
118            .get_otlp_metric_ctx()
119            .cloned()
120            .unwrap_or_default();
121        metric_ctx.is_legacy = is_legacy;
122
123        let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request, &mut metric_ctx)?;
124        OTLP_METRICS_ROWS.inc_by(rows as u64);
125
126        let ctx = if !is_legacy {
127            let mut c = (*ctx).clone();
128            c.set_extension(OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM.to_string());
129            Arc::new(c)
130        } else {
131            ctx
132        };
133
134        // If the user uses the legacy path, it is by default without metric engine.
135        if metric_ctx.is_legacy || !metric_ctx.with_metric_engine {
136            self.handle_row_inserts(requests, ctx, false, false)
137                .await
138                .map_err(BoxedError::new)
139                .context(error::ExecuteGrpcQuerySnafu)
140        } else {
141            let physical_table = ctx
142                .extension(PHYSICAL_TABLE_PARAM)
143                .unwrap_or(GREPTIME_PHYSICAL_TABLE)
144                .to_string();
145            self.handle_metric_row_inserts(requests, ctx, physical_table.clone())
146                .await
147                .map_err(BoxedError::new)
148                .context(error::ExecuteGrpcQuerySnafu)
149        }
150    }
151
152    #[tracing::instrument(skip_all)]
153    async fn traces(
154        &self,
155        pipeline_handler: PipelineHandlerRef,
156        request: ExportTraceServiceRequest,
157        pipeline: PipelineWay,
158        pipeline_params: GreptimePipelineParams,
159        table_name: String,
160        ctx: QueryContextRef,
161    ) -> ServerResult<TraceIngestOutcome> {
162        self.plugins
163            .get::<PermissionCheckerRef>()
164            .as_ref()
165            .check_permission(ctx.current_user(), PermissionReq::Otlp)
166            .context(AuthSnafu)?;
167
168        let interceptor_ref = self
169            .plugins
170            .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
171        interceptor_ref.pre_execute(ctx.clone())?;
172
173        let spans = otlp::trace::span::parse(request);
174        self.ingest_trace_spans(
175            pipeline_handler,
176            &pipeline,
177            &pipeline_params,
178            table_name,
179            spans,
180            ctx,
181        )
182        .await
183    }
184
185    #[tracing::instrument(skip_all)]
186    async fn logs(
187        &self,
188        pipeline_handler: PipelineHandlerRef,
189        request: ExportLogsServiceRequest,
190        pipeline: PipelineWay,
191        pipeline_params: GreptimePipelineParams,
192        table_name: String,
193        ctx: QueryContextRef,
194    ) -> ServerResult<Vec<Output>> {
195        self.plugins
196            .get::<PermissionCheckerRef>()
197            .as_ref()
198            .check_permission(ctx.current_user(), PermissionReq::Otlp)
199            .context(AuthSnafu)?;
200
201        let interceptor_ref = self
202            .plugins
203            .get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
204        interceptor_ref.pre_execute(ctx.clone())?;
205
206        let opt_req = otlp::logs::to_grpc_insert_requests(
207            request,
208            pipeline,
209            pipeline_params,
210            table_name,
211            &ctx,
212            pipeline_handler,
213        )
214        .await?;
215
216        let mut outputs = vec![];
217
218        for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {
219            let cnt = requests
220                .inserts
221                .iter()
222                .filter_map(|r| r.rows.as_ref().map(|r| r.rows.len()))
223                .sum::<usize>();
224
225            let o = self
226                .handle_log_inserts(requests, temp_ctx)
227                .await
228                .inspect(|_| OTLP_LOGS_ROWS.inc_by(cnt as u64))
229                .map_err(BoxedError::new)
230                .context(error::ExecuteGrpcQuerySnafu)?;
231            outputs.push(o);
232        }
233
234        Ok(outputs)
235    }
236}
237
238impl Instance {
239    /// Ingest OTLP trace spans with chunk-level writes and span-level fallback on
240    /// deterministic chunk failures.
241    async fn ingest_trace_spans(
242        &self,
243        pipeline_handler: PipelineHandlerRef,
244        pipeline: &PipelineWay,
245        pipeline_params: &GreptimePipelineParams,
246        table_name: String,
247        groups: Vec<TraceSpanGroup>,
248        ctx: QueryContextRef,
249    ) -> ServerResult<TraceIngestOutcome> {
250        let is_trace_v1_model = matches!(pipeline, PipelineWay::OtlpTraceDirectV1);
251        let ingest_ctx = TraceChunkIngestContext {
252            pipeline_handler,
253            pipeline,
254            pipeline_params,
255            table_name: &table_name,
256            is_trace_v1_model,
257        };
258        let mut ingest_state = TraceIngestState {
259            aux_data: TraceAuxData::default(),
260            outcome: TraceIngestOutcome::default(),
261            failure_messages: Vec::new(),
262        };
263
264        for group in groups {
265            let chunks = group
266                .spans
267                .into_iter()
268                .chunks(TRACE_INGEST_CHUNK_SIZE)
269                .into_iter()
270                .map(|chunk| chunk.collect::<Vec<_>>())
271                .collect::<Vec<_>>();
272            for chunk in chunks {
273                self.ingest_trace_chunk(&ingest_ctx, chunk, ctx.clone(), &mut ingest_state)
274                    .await?;
275            }
276        }
277
278        OTLP_TRACES_ROWS.inc_by(ingest_state.outcome.accepted_spans as u64);
279
280        if !ingest_state.aux_data.is_empty() {
281            // Auxiliary trace tables are derived from spans whose main-table
282            // writes are already confirmed, so they never create new accepted
283            // spans and they do not affect rejected span counts.
284            let (aux_requests, _) = otlp::trace::to_grpc_insert_requests_for_aux_tables(
285                std::mem::take(&mut ingest_state.aux_data),
286                ingest_ctx.pipeline,
287                ingest_ctx.table_name,
288            )?;
289
290            if !aux_requests.inserts.is_empty() {
291                match self
292                    .insert_trace_requests(aux_requests, ingest_ctx.is_trace_v1_model, ctx)
293                    .await
294                {
295                    Ok(output) => {
296                        Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
297                    }
298                    Err(err) => {
299                        Self::push_trace_failure_message(
300                            &mut ingest_state.failure_messages,
301                            "aux_table_update_failed",
302                            format!(
303                                "Auxiliary trace tables were not fully updated ({})",
304                                err.status_code().as_ref()
305                            ),
306                        );
307                    }
308                }
309            }
310        }
311
312        ingest_state.outcome.error_message = Self::finish_trace_failure_message(
313            ingest_state.outcome.accepted_spans,
314            ingest_state.outcome.rejected_spans,
315            ingest_state.failure_messages,
316        );
317
318        Ok(ingest_state.outcome)
319    }
320
321    /// Ingest one owned trace chunk so successful spans can be moved into the
322    /// accepted set without extra cloning.
323    async fn ingest_trace_chunk(
324        &self,
325        ingest_ctx: &TraceChunkIngestContext<'_>,
326        chunk: Vec<TraceSpan>,
327        ctx: QueryContextRef,
328        ingest_state: &mut TraceIngestState,
329    ) -> ServerResult<()> {
330        // Try the fast path first so healthy batches keep their original
331        // throughput and write amplification stays low.
332        let (requests, chunk_rows) = otlp::trace::to_grpc_insert_requests_from_spans(
333            &chunk,
334            ingest_ctx.pipeline,
335            ingest_ctx.pipeline_params,
336            ingest_ctx.table_name,
337            &ctx,
338            ingest_ctx.pipeline_handler.clone(),
339        )?;
340
341        match self
342            .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
343            .await
344        {
345            Ok(output) => {
346                Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
347                ingest_state.outcome.accepted_spans += chunk_rows;
348                for span in &chunk {
349                    ingest_state.aux_data.observe_span(span);
350                }
351            }
352            Err(err) => match Self::classify_trace_chunk_failure(err.status_code()) {
353                ChunkFailureReaction::RetryPerSpan => {
354                    Self::push_trace_failure_message(
355                        &mut ingest_state.failure_messages,
356                        ChunkFailureReaction::RetryPerSpan.as_metric_label(),
357                        format!("Chunk fallback triggered by {}", err.status_code().as_ref()),
358                    );
359                    // Only deterministic failures are retried span by span.
360                    // This includes schemaless table or column creation paths for
361                    // trace ingestion. Ambiguous failures are handled below
362                    // without retrying because the chunk may already have been
363                    // ingested.
364                    self.ingest_trace_chunk_span_by_span(
365                        ingest_ctx,
366                        chunk,
367                        ctx.clone(),
368                        ingest_state,
369                    )
370                    .await?;
371                }
372                ChunkFailureReaction::DiscardChunk => {
373                    ingest_state.outcome.rejected_spans += chunk.len();
374                    Self::push_trace_failure_message(
375                        &mut ingest_state.failure_messages,
376                        ChunkFailureReaction::DiscardChunk.as_metric_label(),
377                        format!(
378                            "Discarded {} spans after ambiguous chunk failure ({})",
379                            chunk.len(),
380                            err.status_code().as_ref()
381                        ),
382                    );
383                    // TODO(shuiyisong): Add an idempotent retry-safe recovery path for
384                    // ambiguous chunk failures such as timeout-like errors.
385                }
386                // Retryable or ambiguous failures must fail the request instead of
387                // becoming partial success. This path is not retry-safe because the
388                // chunk may already have been committed before the error surfaced.
389                ChunkFailureReaction::Propagate => {
390                    Self::push_trace_failure_message(
391                        &mut ingest_state.failure_messages,
392                        ChunkFailureReaction::Propagate.as_metric_label(),
393                        format!(
394                            "Propagating retryable chunk failure ({})",
395                            err.status_code().as_ref()
396                        ),
397                    );
398                    return Err(err);
399                }
400            },
401        }
402
403        Ok(())
404    }
405
406    /// Retry spans one by one only after a deterministic chunk failure.
407    async fn ingest_trace_chunk_span_by_span(
408        &self,
409        ingest_ctx: &TraceChunkIngestContext<'_>,
410        chunk: Vec<TraceSpan>,
411        ctx: QueryContextRef,
412        ingest_state: &mut TraceIngestState,
413    ) -> ServerResult<()> {
414        for span in chunk {
415            let (requests, rows) = otlp::trace::to_grpc_insert_requests_from_spans(
416                std::slice::from_ref(&span),
417                ingest_ctx.pipeline,
418                ingest_ctx.pipeline_params,
419                ingest_ctx.table_name,
420                &ctx,
421                ingest_ctx.pipeline_handler.clone(),
422            )?;
423
424            match self
425                .insert_trace_requests(requests, ingest_ctx.is_trace_v1_model, ctx.clone())
426                .await
427            {
428                Ok(output) => {
429                    Self::add_trace_write_cost(&mut ingest_state.outcome, output.meta.cost);
430                    ingest_state.outcome.accepted_spans += rows;
431                    ingest_state.aux_data.observe_span(&span);
432                }
433                Err(err) => {
434                    if Self::should_propagate_trace_span_failure(err.status_code()) {
435                        Self::push_trace_failure_message(
436                            &mut ingest_state.failure_messages,
437                            ChunkFailureReaction::Propagate.as_metric_label(),
438                            format!(
439                                "Propagating retryable span failure for {}:{} ({})",
440                                span.trace_id,
441                                span.span_id,
442                                err.status_code().as_ref()
443                            ),
444                        );
445                        return Err(err);
446                    }
447
448                    ingest_state.outcome.rejected_spans += 1;
449                    Self::push_trace_failure_message(
450                        &mut ingest_state.failure_messages,
451                        "span_rejected",
452                        format!(
453                            "Rejected span {}:{} ({})",
454                            span.trace_id,
455                            span.span_id,
456                            err.status_code().as_ref()
457                        ),
458                    );
459                }
460            }
461        }
462
463        Ok(())
464    }
465
466    /// Reconcile and insert one trace request batch.
467    async fn insert_trace_requests(
468        &self,
469        mut requests: RowInsertRequests,
470        is_trace_v1_model: bool,
471        ctx: QueryContextRef,
472    ) -> ServerResult<Output> {
473        if is_trace_v1_model {
474            self.reconcile_trace_column_types(&mut requests, &ctx)
475                .await?;
476            self.handle_trace_inserts(requests, ctx)
477                .await
478                .map_err(BoxedError::new)
479                .context(error::ExecuteGrpcQuerySnafu)
480        } else {
481            self.handle_log_inserts(requests, ctx)
482                .await
483                .map_err(BoxedError::new)
484                .context(error::ExecuteGrpcQuerySnafu)
485        }
486    }
487
488    fn classify_trace_chunk_failure(status: StatusCode) -> ChunkFailureReaction {
489        match status {
490            StatusCode::InvalidArguments
491            | StatusCode::InvalidSyntax
492            | StatusCode::Unsupported
493            | StatusCode::TableNotFound
494            | StatusCode::TableColumnNotFound => ChunkFailureReaction::RetryPerSpan,
495            StatusCode::DatabaseNotFound => ChunkFailureReaction::DiscardChunk,
496            StatusCode::Cancelled | StatusCode::DeadlineExceeded => ChunkFailureReaction::Propagate,
497            _ if status.is_retryable() => ChunkFailureReaction::Propagate,
498            _ => ChunkFailureReaction::DiscardChunk,
499        }
500    }
501
502    fn should_propagate_trace_span_failure(status: StatusCode) -> bool {
503        matches!(
504            Self::classify_trace_chunk_failure(status),
505            ChunkFailureReaction::Propagate
506        )
507    }
508
509    fn add_trace_write_cost(outcome: &mut TraceIngestOutcome, cost: usize) {
510        outcome.write_cost += cost;
511    }
512
513    fn push_trace_failure_message(messages: &mut Vec<String>, label: &str, message: String) {
514        OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).inc();
515
516        if messages.len() < TRACE_FAILURE_MESSAGE_LIMIT {
517            messages.push(message);
518        } else if messages.len() == TRACE_FAILURE_MESSAGE_LIMIT {
519            tracing::debug!(
520                label,
521                limit = TRACE_FAILURE_MESSAGE_LIMIT,
522                "Trace ingest failure message limit reached; suppressing additional failure details"
523            );
524        }
525    }
526
527    fn finish_trace_failure_message(
528        accepted_spans: usize,
529        rejected_spans: usize,
530        messages: Vec<String>,
531    ) -> Option<String> {
532        if rejected_spans == 0 && messages.is_empty() {
533            return None;
534        }
535
536        let mut summary = format!(
537            "Accepted {} spans, rejected {} spans",
538            accepted_spans, rejected_spans
539        );
540
541        if !messages.is_empty() {
542            summary.push_str(": ");
543            summary.push_str(&messages.join("; "));
544        }
545
546        Some(summary)
547    }
548
549    /// Picks the final datatype for one trace column.
550    ///
551    /// Existing table schema is authoritative when present. Otherwise we resolve the
552    /// request-local observed types using the shared trace coercion rules.
553    fn choose_trace_target_type(
554        observed_types: &[ColumnDataType],
555        existing_type: Option<ColumnDataType>,
556    ) -> ServerResult<Option<ColumnDataType>> {
557        let Some(existing_type) = existing_type else {
558            return resolve_new_trace_column_type(observed_types.iter().copied()).map_err(|_| {
559                error::InvalidParameterSnafu {
560                    reason: "unsupported trace type mix".to_string(),
561                }
562                .build()
563            });
564        };
565
566        if observed_types.iter().copied().all(|request_type| {
567            request_type == existing_type
568                || is_supported_trace_coercion(request_type, existing_type)
569        }) {
570            Ok(Some(existing_type))
571        } else {
572            error::InvalidParameterSnafu {
573                reason: "unsupported trace type mix".to_string(),
574            }
575            .fail()
576        }
577    }
578
579    /// Coerce request column types and values to match the existing table schema
580    /// for compatible type pairs. Existing table schema wins when present;
581    /// otherwise the full request batch decides a stable target type.
582    async fn reconcile_trace_column_types(
583        &self,
584        requests: &mut RowInsertRequests,
585        ctx: &QueryContextRef,
586    ) -> ServerResult<()> {
587        let catalog = ctx.current_catalog();
588        let schema = ctx.current_schema();
589
590        for req in &mut requests.inserts {
591            let table = self
592                .catalog_manager
593                .table(catalog, &schema, &req.table_name, None)
594                .await?;
595
596            let Some(rows) = req.rows.as_mut() else {
597                continue;
598            };
599
600            let table_schema = table.map(|table| table.schema());
601            let mut pending_coercions = Vec::new();
602
603            for (col_idx, col_schema) in rows.schema.iter().enumerate() {
604                let Some(current_type) = ColumnDataType::try_from(col_schema.datatype).ok() else {
605                    continue;
606                };
607
608                let mut observed_types = Vec::new();
609                push_observed_trace_type(&mut observed_types, current_type);
610
611                // Scan the full request first so the final type decision is not affected
612                // by row order inside the batch.
613                for row in &rows.rows {
614                    let Some(value) = row
615                        .values
616                        .get(col_idx)
617                        .and_then(|value| value.value_data.as_ref())
618                    else {
619                        continue;
620                    };
621
622                    let Some(value_type) = trace_value_datatype(value) else {
623                        continue;
624                    };
625                    push_observed_trace_type(&mut observed_types, value_type);
626                }
627
628                let existing_type = table_schema
629                    .as_ref()
630                    .and_then(|schema| schema.column_schema_by_name(&col_schema.column_name))
631                    .and_then(|table_col| {
632                        ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
633                            .ok()
634                            .map(|wrapper| wrapper.datatype())
635                    });
636
637                if !observed_types
638                    .iter()
639                    .copied()
640                    .any(is_trace_reconcile_candidate_type)
641                    && existing_type
642                        .map(|datatype| !is_trace_reconcile_candidate_type(datatype))
643                        .unwrap_or(true)
644                {
645                    continue;
646                }
647
648                // Decide the final type once per column, then rewrite all affected cells
649                // together in one row pass below.
650                let Some(target_type) =
651                    Self::choose_trace_target_type(&observed_types, existing_type).map_err(
652                        |_| {
653                            enrich_trace_reconcile_error(
654                                &req.table_name,
655                                &col_schema.column_name,
656                                &observed_types,
657                                existing_type,
658                            )
659                        },
660                    )?
661                else {
662                    continue;
663                };
664
665                if observed_types
666                    .iter()
667                    .all(|observed| *observed == target_type)
668                    && col_schema.datatype == target_type as i32
669                {
670                    continue;
671                }
672
673                pending_coercions.push((col_idx, target_type, col_schema.column_name.clone()));
674            }
675
676            if pending_coercions.is_empty() {
677                continue;
678            }
679
680            // Update schema metadata before mutating row values so both stay in sync.
681            for (col_idx, target_type, ..) in &pending_coercions {
682                rows.schema[*col_idx].datatype = *target_type as i32;
683            }
684
685            // Apply all pending column rewrites in one row pass.
686            for row in &mut rows.rows {
687                for (col_idx, target_type, column_name) in &pending_coercions {
688                    let Some(value) = row.values.get_mut(*col_idx) else {
689                        continue;
690                    };
691                    let Some(request_type) =
692                        value.value_data.as_ref().and_then(trace_value_datatype)
693                    else {
694                        continue;
695                    };
696                    if request_type == *target_type {
697                        continue;
698                    }
699
700                    value.value_data = coerce_value_data(
701                        &value.value_data,
702                        *target_type,
703                        request_type,
704                    )
705                    .map_err(|_| {
706                        error::InvalidParameterSnafu {
707                            reason: format!(
708                                "failed to coerce trace column '{}' in table '{}' from {:?} to {:?}",
709                                column_name, req.table_name, request_type, target_type
710                            ),
711                        }
712                        .build()
713                    })?;
714                }
715            }
716        }
717
718        Ok(())
719    }
720}
721
722fn enrich_trace_reconcile_error(
723    table_name: &str,
724    column_name: &str,
725    observed_types: &[ColumnDataType],
726    existing_type: Option<ColumnDataType>,
727) -> servers::error::Error {
728    let observed_types = observed_types
729        .iter()
730        .map(|datatype| format!("{datatype:?}"))
731        .collect::<Vec<_>>()
732        .join(", ");
733
734    error::InvalidParameterSnafu {
735        reason: match existing_type {
736            Some(existing_type) => format!(
737                "failed to reconcile trace column '{}' in table '{}' with observed types [{}] against existing {:?}",
738                column_name, table_name, observed_types, existing_type
739            ),
740            None => format!(
741                "failed to reconcile trace column '{}' in table '{}' with observed types [{}]",
742                column_name, table_name, observed_types
743            ),
744        },
745    }
746    .build()
747}
748
749/// Only these trace scalar types participate in reconciliation. Other column kinds
750/// such as JSON and binary keep their original write path and schema checks.
751fn is_trace_reconcile_candidate_type(datatype: ColumnDataType) -> bool {
752    matches!(
753        datatype,
754        ColumnDataType::String
755            | ColumnDataType::Boolean
756            | ColumnDataType::Int64
757            | ColumnDataType::Float64
758    )
759}
760
761/// Keeps the observed type list small without depending on enum ordering.
762fn push_observed_trace_type(observed_types: &mut Vec<ColumnDataType>, datatype: ColumnDataType) {
763    if !observed_types.contains(&datatype) {
764        observed_types.push(datatype);
765    }
766}
767
768#[cfg(test)]
769mod tests {
770    use common_error::status_code::StatusCode;
771    use servers::query_handler::TraceIngestOutcome;
772
773    use super::{ChunkFailureReaction, Instance};
774    use crate::metrics::OTLP_TRACES_FAILURE_COUNT;
775
776    #[test]
777    fn test_classify_trace_chunk_failure() {
778        assert_eq!(
779            Instance::classify_trace_chunk_failure(StatusCode::InvalidArguments),
780            ChunkFailureReaction::RetryPerSpan
781        );
782        assert_eq!(
783            Instance::classify_trace_chunk_failure(StatusCode::InvalidSyntax),
784            ChunkFailureReaction::RetryPerSpan
785        );
786        assert_eq!(
787            Instance::classify_trace_chunk_failure(StatusCode::Unsupported),
788            ChunkFailureReaction::RetryPerSpan
789        );
790        assert_eq!(
791            Instance::classify_trace_chunk_failure(StatusCode::TableColumnNotFound),
792            ChunkFailureReaction::RetryPerSpan
793        );
794        assert_eq!(
795            Instance::classify_trace_chunk_failure(StatusCode::TableNotFound),
796            ChunkFailureReaction::RetryPerSpan
797        );
798        assert_eq!(
799            Instance::classify_trace_chunk_failure(StatusCode::DatabaseNotFound),
800            ChunkFailureReaction::DiscardChunk
801        );
802        assert_eq!(
803            Instance::classify_trace_chunk_failure(StatusCode::DeadlineExceeded),
804            ChunkFailureReaction::Propagate
805        );
806        assert_eq!(
807            Instance::classify_trace_chunk_failure(StatusCode::Cancelled),
808            ChunkFailureReaction::Propagate
809        );
810        assert_eq!(
811            Instance::classify_trace_chunk_failure(StatusCode::StorageUnavailable),
812            ChunkFailureReaction::Propagate
813        );
814        assert_eq!(
815            Instance::classify_trace_chunk_failure(StatusCode::Internal),
816            ChunkFailureReaction::Propagate
817        );
818        assert_eq!(
819            Instance::classify_trace_chunk_failure(StatusCode::RegionNotReady),
820            ChunkFailureReaction::Propagate
821        );
822        assert_eq!(
823            Instance::classify_trace_chunk_failure(StatusCode::TableUnavailable),
824            ChunkFailureReaction::Propagate
825        );
826        assert_eq!(
827            Instance::classify_trace_chunk_failure(StatusCode::RegionBusy),
828            ChunkFailureReaction::Propagate
829        );
830        assert_eq!(
831            Instance::classify_trace_chunk_failure(StatusCode::RuntimeResourcesExhausted),
832            ChunkFailureReaction::Propagate
833        );
834    }
835
836    #[test]
837    fn test_classify_trace_span_failure() {
838        assert!(Instance::should_propagate_trace_span_failure(
839            StatusCode::DeadlineExceeded
840        ));
841        assert!(Instance::should_propagate_trace_span_failure(
842            StatusCode::StorageUnavailable
843        ));
844        assert!(!Instance::should_propagate_trace_span_failure(
845            StatusCode::InvalidArguments
846        ));
847    }
848
849    #[test]
850    fn test_add_trace_write_cost() {
851        let mut outcome = TraceIngestOutcome::default();
852        Instance::add_trace_write_cost(&mut outcome, 3);
853        Instance::add_trace_write_cost(&mut outcome, 5);
854        assert_eq!(outcome.write_cost, 8);
855    }
856
857    #[test]
858    fn test_finish_trace_failure_message() {
859        let message = Instance::finish_trace_failure_message(
860            3,
861            2,
862            vec!["Rejected span trace:span (InvalidArguments)".to_string()],
863        )
864        .unwrap();
865        assert!(message.contains("Accepted 3 spans, rejected 2 spans"));
866        assert!(message.contains("Rejected span trace:span"));
867
868        assert_eq!(Instance::finish_trace_failure_message(2, 0, vec![]), None);
869    }
870
871    #[test]
872    fn test_finish_trace_failure_message_without_detail_messages() {
873        assert_eq!(
874            Instance::finish_trace_failure_message(0, 2, vec![]),
875            Some("Accepted 0 spans, rejected 2 spans".to_string())
876        );
877    }
878
879    #[test]
880    fn test_push_trace_failure_message_increments_labeled_counter() {
881        let label = "retry_per_span_counter_test";
882        let initial = OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get();
883        let mut messages = Vec::new();
884
885        Instance::push_trace_failure_message(
886            &mut messages,
887            label,
888            "Chunk fallback triggered by InvalidArguments".to_string(),
889        );
890
891        assert_eq!(messages.len(), 1);
892        assert_eq!(
893            OTLP_TRACES_FAILURE_COUNT.with_label_values(&[label]).get(),
894            initial + 1
895        );
896    }
897
898    #[test]
899    fn test_push_trace_failure_message_caps_recorded_messages() {
900        let label = "retry_per_span_limit_test";
901        let mut messages = Vec::new();
902
903        for idx in 0..=4 {
904            Instance::push_trace_failure_message(&mut messages, label, format!("failure-{idx}"));
905        }
906
907        assert_eq!(messages.len(), 4);
908        assert_eq!(
909            messages,
910            vec![
911                "failure-0".to_string(),
912                "failure-1".to_string(),
913                "failure-2".to_string(),
914                "failure-3".to_string()
915            ]
916        );
917    }
918
919    #[test]
920    fn test_classify_trace_chunk_failure_defaults_to_discard() {
921        assert_eq!(
922            Instance::classify_trace_chunk_failure(StatusCode::Unknown),
923            ChunkFailureReaction::DiscardChunk
924        );
925    }
926}