Skip to main content

table/requests/
semantic.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Table semantic layer vocabulary.
16//!
17//! A thin layer of semantic metadata attached to a table via `table_options`, so
18//! machine consumers (LLM agents, alert/dashboard builders, MCP servers, ETL) can
19//! align a table with the observability concept it stands for without guessing
20//! from column names. See `docs/rfcs/2026-05-28-table-semantic-layer.md`.
21//!
22//! All public table-option keys share the [`SEMANTIC_PREFIX`] namespace and are
23//! string-valued. [`is_semantic_option_key`] gates them through
24//! [`crate::requests::validate_table_option`], so they are accepted both on the
25//! ingestion auto-create path and on explicit `CREATE TABLE ... WITH (...)` DDL.
26
27/// Reserved prefix for every public semantic table-option key.
28pub const SEMANTIC_PREFIX: &str = "greptime.semantic.";
29
30/// Internal `QueryContext` extension key carrying the per-table semantic index
31/// (a `{table_name -> {semantic_key: value}}` JSON blob) from the ingestion
32/// encode path to the auto-create site. Deliberately OUTSIDE [`SEMANTIC_PREFIX`]
33/// so it is not a valid table option and never leaks into a table's options.
34pub const SEMANTIC_PER_TABLE_INDEX_KEY: &str = "greptime.internal.semantic.per_table_index";
35
36// ---- Common keys (all signals) ----
37
38/// Signal kind: one of [`SIGNAL_TYPE_TRACE`] / [`SIGNAL_TYPE_LOG`] /
39/// [`SIGNAL_TYPE_METRIC`] / [`SIGNAL_TYPE_EVENT`].
40pub const SEMANTIC_SIGNAL_TYPE: &str = "greptime.semantic.signal_type";
41/// Ingestion ecosystem, e.g. [`SOURCE_OPENTELEMETRY`] / [`SOURCE_PROMETHEUS`].
42pub const SEMANTIC_SOURCE: &str = "greptime.semantic.source";
43/// Optional protocol or SDK version string, e.g. `v2` (Prom remote write), `1.30.0`.
44pub const SEMANTIC_SOURCE_VERSION: &str = "greptime.semantic.source_version";
45/// Internal ingestion pipeline / data model, e.g. `greptime_trace_v1`.
46pub const SEMANTIC_PIPELINE: &str = "greptime.semantic.pipeline";
47
48// ---- Trace keys ----
49
50/// Semantic-conventions version the rows conform to (e.g. `otel-semconv-1.27`),
51/// or [`SEMANTIC_VALUE_UNKNOWN`] / [`SEMANTIC_VALUE_MIXED`] when not single-valued.
52pub const SEMANTIC_TRACE_CONVENTIONS: &str = "greptime.semantic.trace.conventions";
53/// Whether `span_events` are preserved on the table.
54pub const SEMANTIC_TRACE_HAS_EVENTS: &str = "greptime.semantic.trace.has_events";
55/// Whether `span_links` are preserved on the table.
56pub const SEMANTIC_TRACE_HAS_LINKS: &str = "greptime.semantic.trace.has_links";
57
58// ---- Metric keys (populated in Phase 2) ----
59
60/// Instrument kind: `counter` / `gauge` / `histogram` / `summary` /
61/// `updown_counter` / `gauge_histogram` / `info` / `stateset`.
62pub const SEMANTIC_METRIC_TYPE: &str = "greptime.semantic.metric.type";
63/// UCUM unit, e.g. `s`, `By`, `{request}`.
64pub const SEMANTIC_METRIC_UNIT: &str = "greptime.semantic.metric.unit";
65/// `cumulative` / `delta` (OTel only).
66pub const SEMANTIC_METRIC_TEMPORALITY: &str = "greptime.semantic.metric.temporality";
67/// `true` / `false` for sum / counter typed data.
68pub const SEMANTIC_METRIC_MONOTONIC: &str = "greptime.semantic.metric.monotonic";
69/// [`METADATA_QUALITY_DECLARED`] when the protocol stated the type, or
70/// [`METADATA_QUALITY_INFERRED`] when guessed from a name suffix.
71pub const SEMANTIC_METRIC_METADATA_QUALITY: &str = "greptime.semantic.metric.metadata_quality";
72/// Pre-translation OTel metric name when the table name was Prometheus-ised.
73pub const SEMANTIC_METRIC_ORIGINAL_NAME: &str = "greptime.semantic.metric.original_name";
74
75// ---- Log keys (populated in Phase 3) ----
76
77/// `otlp` / `syslog` / `custom` — which mapping to use for `severity_number`.
78pub const SEMANTIC_LOG_SEVERITY_SCHEME: &str = "greptime.semantic.log.severity_scheme";
79/// `string` / `json` / `mixed` — how to parse `body`.
80pub const SEMANTIC_LOG_BODY_FORMAT: &str = "greptime.semantic.log.body_format";
81
82// ---- Resource / scope preservation keys (populated in Phase 3) ----
83
84/// JSON array string of resource attributes promoted to first-class columns.
85pub const SEMANTIC_RESOURCE_ATTRIBUTES_PRESERVED: &str =
86    "greptime.semantic.resource.attributes_preserved";
87/// `true` / `false` — whether any resource attribute was dropped at ingest.
88pub const SEMANTIC_RESOURCE_ATTRIBUTES_DROPPED: &str =
89    "greptime.semantic.resource.attributes_dropped";
90/// `true` / `false` — whether `scope.name` / `scope.version` survive on the row.
91pub const SEMANTIC_SCOPE_PRESERVED: &str = "greptime.semantic.scope.preserved";
92
93// ---- Value constants ----
94
95pub const SIGNAL_TYPE_TRACE: &str = "trace";
96pub const SIGNAL_TYPE_LOG: &str = "log";
97pub const SIGNAL_TYPE_METRIC: &str = "metric";
98pub const SIGNAL_TYPE_EVENT: &str = "event";
99
100pub const SOURCE_OPENTELEMETRY: &str = "opentelemetry";
101pub const SOURCE_PROMETHEUS: &str = "prometheus";
102
103pub const METADATA_QUALITY_DECLARED: &str = "declared";
104pub const METADATA_QUALITY_INFERRED: &str = "inferred";
105
106/// Sentinel for a key that cannot be determined at stamp time.
107pub const SEMANTIC_VALUE_UNKNOWN: &str = "unknown";
108/// Sentinel for a single-valued key that saw conflicting sources.
109pub const SEMANTIC_VALUE_MIXED: &str = "mixed";
110
111/// Every recognised public semantic table-option key. The set is a closed
112/// whitelist: keys under [`SEMANTIC_PREFIX`] that are not listed here are rejected,
113/// so an unknown key like `greptime.semantic.unknown_key` does not silently land
114/// in a table's options. Adding a key to the vocabulary means adding it here.
115pub const SEMANTIC_OPTION_KEYS: &[&str] = &[
116    SEMANTIC_SIGNAL_TYPE,
117    SEMANTIC_SOURCE,
118    SEMANTIC_SOURCE_VERSION,
119    SEMANTIC_PIPELINE,
120    SEMANTIC_TRACE_CONVENTIONS,
121    SEMANTIC_TRACE_HAS_EVENTS,
122    SEMANTIC_TRACE_HAS_LINKS,
123    SEMANTIC_METRIC_TYPE,
124    SEMANTIC_METRIC_UNIT,
125    SEMANTIC_METRIC_TEMPORALITY,
126    SEMANTIC_METRIC_MONOTONIC,
127    SEMANTIC_METRIC_METADATA_QUALITY,
128    SEMANTIC_METRIC_ORIGINAL_NAME,
129    SEMANTIC_LOG_SEVERITY_SCHEME,
130    SEMANTIC_LOG_BODY_FORMAT,
131    SEMANTIC_RESOURCE_ATTRIBUTES_PRESERVED,
132    SEMANTIC_RESOURCE_ATTRIBUTES_DROPPED,
133    SEMANTIC_SCOPE_PRESERVED,
134];
135
136/// Returns true if `key` is a recognised semantic table-option key (whitelist).
137///
138/// Note this is membership, not a prefix test: unknown keys under
139/// [`SEMANTIC_PREFIX`] are rejected, and the internal
140/// [`SEMANTIC_PER_TABLE_INDEX_KEY`] (outside the prefix) never matches.
141pub fn is_semantic_option_key(key: &str) -> bool {
142    SEMANTIC_OPTION_KEYS.contains(&key)
143}
144
145/// Validates a `greptime.semantic.*` option's `value` against its allowed domain.
146///
147/// Open-value keys (unit, original_name, version, pipeline, conventions, the
148/// preserved-attributes list) accept any non-empty string. Closed-domain keys
149/// accept a fixed set, plus the `unknown` sentinel, plus `mixed` for the keys
150/// where one long-lived table can legitimately see multiple values. Keys not in
151/// [`SEMANTIC_OPTION_KEYS`] are rejected.
152pub fn validate_semantic_option(key: &str, value: &str) -> bool {
153    match key {
154        SEMANTIC_SOURCE_VERSION
155        | SEMANTIC_PIPELINE
156        | SEMANTIC_METRIC_UNIT
157        | SEMANTIC_METRIC_ORIGINAL_NAME
158        | SEMANTIC_TRACE_CONVENTIONS
159        | SEMANTIC_RESOURCE_ATTRIBUTES_PRESERVED => !value.is_empty(),
160
161        SEMANTIC_SIGNAL_TYPE => matches!(value, "trace" | "log" | "metric" | "event" | "unknown"),
162        SEMANTIC_SOURCE => matches!(
163            value,
164            "opentelemetry"
165                | "prometheus"
166                | "elasticsearch"
167                | "loki"
168                | "custom"
169                | "mixed"
170                | "unknown"
171        ),
172        SEMANTIC_METRIC_TYPE => matches!(
173            value,
174            "counter"
175                | "gauge"
176                | "histogram"
177                | "summary"
178                | "updown_counter"
179                | "gauge_histogram"
180                | "info"
181                | "stateset"
182                | "mixed"
183                | "unknown"
184        ),
185        SEMANTIC_METRIC_TEMPORALITY => {
186            matches!(value, "cumulative" | "delta" | "mixed" | "unknown")
187        }
188        SEMANTIC_METRIC_MONOTONIC
189        | SEMANTIC_TRACE_HAS_EVENTS
190        | SEMANTIC_TRACE_HAS_LINKS
191        | SEMANTIC_RESOURCE_ATTRIBUTES_DROPPED
192        | SEMANTIC_SCOPE_PRESERVED => matches!(value, "true" | "false" | "unknown"),
193        SEMANTIC_METRIC_METADATA_QUALITY => matches!(value, "declared" | "inferred" | "unknown"),
194        SEMANTIC_LOG_SEVERITY_SCHEME => matches!(value, "otlp" | "syslog" | "custom" | "unknown"),
195        SEMANTIC_LOG_BODY_FORMAT => matches!(value, "string" | "json" | "mixed" | "unknown"),
196
197        _ => false,
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_is_semantic_option_key() {
207        assert!(is_semantic_option_key(SEMANTIC_SIGNAL_TYPE));
208        assert!(is_semantic_option_key(SEMANTIC_METRIC_TYPE));
209
210        // Unknown keys under the prefix are not whitelisted.
211        assert!(!is_semantic_option_key("greptime.semantic.future.key"));
212        assert!(!is_semantic_option_key("greptime.semantic.unknown_key"));
213        // Near-misses must not match.
214        assert!(!is_semantic_option_key("greptime.semanticx"));
215        assert!(!is_semantic_option_key("semantic.signal_type"));
216        assert!(!is_semantic_option_key("table_data_model"));
217        // The internal transport key must never be treated as a table option.
218        assert!(!is_semantic_option_key(SEMANTIC_PER_TABLE_INDEX_KEY));
219    }
220
221    #[test]
222    fn test_validate_semantic_option() {
223        // Enum keys reject out-of-domain values.
224        assert!(validate_semantic_option(SEMANTIC_SIGNAL_TYPE, "metric"));
225        assert!(!validate_semantic_option(SEMANTIC_SIGNAL_TYPE, "spans"));
226        assert!(validate_semantic_option(SEMANTIC_METRIC_TYPE, "counter"));
227        assert!(validate_semantic_option(SEMANTIC_METRIC_TYPE, "mixed"));
228        assert!(!validate_semantic_option(SEMANTIC_METRIC_TYPE, "bogus"));
229
230        // Booleans, sentinels, open values.
231        assert!(validate_semantic_option(SEMANTIC_TRACE_HAS_EVENTS, "true"));
232        assert!(!validate_semantic_option(SEMANTIC_TRACE_HAS_EVENTS, "yes"));
233        assert!(validate_semantic_option(
234            SEMANTIC_METRIC_TEMPORALITY,
235            "unknown"
236        ));
237        assert!(validate_semantic_option(SEMANTIC_METRIC_UNIT, "By"));
238        assert!(!validate_semantic_option(SEMANTIC_METRIC_UNIT, ""));
239
240        // Unknown key is rejected regardless of value.
241        assert!(!validate_semantic_option(
242            "greptime.semantic.future.key",
243            "x"
244        ));
245
246        // Drift guard: every value stamped by the ingestion path must validate.
247        assert!(validate_semantic_option(
248            SEMANTIC_SIGNAL_TYPE,
249            SIGNAL_TYPE_TRACE
250        ));
251        assert!(validate_semantic_option(
252            SEMANTIC_SIGNAL_TYPE,
253            SIGNAL_TYPE_METRIC
254        ));
255        assert!(validate_semantic_option(
256            SEMANTIC_SIGNAL_TYPE,
257            SIGNAL_TYPE_LOG
258        ));
259        assert!(validate_semantic_option(
260            SEMANTIC_SOURCE,
261            SOURCE_OPENTELEMETRY
262        ));
263        assert!(validate_semantic_option(SEMANTIC_SOURCE, SOURCE_PROMETHEUS));
264        assert!(validate_semantic_option(
265            SEMANTIC_METRIC_METADATA_QUALITY,
266            METADATA_QUALITY_INFERRED
267        ));
268        assert!(validate_semantic_option(
269            SEMANTIC_TRACE_CONVENTIONS,
270            SEMANTIC_VALUE_UNKNOWN
271        ));
272        // An empty value never validates, for any whitelisted key.
273        for key in SEMANTIC_OPTION_KEYS {
274            assert!(
275                !validate_semantic_option(key, ""),
276                "empty value should never validate for {key}"
277            );
278        }
279    }
280}