mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-04 22:30:37 +00:00
feat: table semantic layer identity (Phase 1) (#8210)
* feat: table semantic layer identity (Phase 1) Attach a thin layer of semantic metadata to ingested tables via the existing `table_options` slot, so machine consumers (LLM agents, alert/dashboard builders, MCP servers, ETL) can align a table with the observability concept it stands for without guessing from column names. See docs/rfcs/2026-05-28-table-semantic-layer.md. Phase 1 (identity) only: - New `table::requests::semantic` module: the `greptime.semantic.*` vocabulary (signal/source/source_version/pipeline + trace/metric/log/resource-scope keys, defined now, populated by later phases), value constants, the internal `greptime.internal.semantic.per_table_index` transport key (reserved for Phase 2, deliberately outside the public namespace), and `is_semantic_option_key`. - `validate_table_option` accepts the `greptime.semantic.*` prefix, so the keys are valid both on the auto-create path and on explicit `CREATE TABLE ... WITH (...)`. - `fill_table_options_for_create` copies every semantic ctx extension into the new table's options (prefix passthrough alongside the fixed allowlist). - Frontend stamps identity on the context at each ingest entry: OTLP metrics (metric/opentelemetry), traces (+pipeline, has_events/has_links/conventions for the v1 model), logs (log/opentelemetry), and Prometheus remote write (metric/prometheus, metadata_quality=inferred). OTLP metric metadata_quality is left for Phase 2 (declared). - Trace identity is stamped only on the main span table; the derived `_services` / `_operations` lookup tables keep the unstamped context and carry no semantic identity (cross-table relationships are out of scope). Semantic options appear in SHOW CREATE TABLE (like table_data_model / otlp_metric_compat) and in information_schema, so an LLM inspecting a table sees its semantics directly. Tests: unit (validation prefix + internal-key rejection, ctx passthrough) and integration assertions that the common keys land for OTLP metrics (metric-engine logical table), traces, logs, and Prometheus remote write; SHOW CREATE goldens updated. Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * fix: prom batcher not cover and white list for semantic keys/values Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * fix: typo Signed-off-by: Dennis Zhuang <killme2008@gmail.com> --------- Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
@@ -40,7 +40,7 @@ use snafu::{ResultExt, ensure};
|
||||
use sqlparser::dialect::Dialect;
|
||||
use sqlparser::keywords::Keyword;
|
||||
use sqlparser::parser::Parser;
|
||||
use table::requests::validate_table_option;
|
||||
use table::requests::{SEMANTIC_PREFIX, validate_semantic_option, validate_table_option};
|
||||
|
||||
use crate::error::{
|
||||
ConvertToLogicalExpressionSnafu, InvalidSqlSnafu, InvalidTableOptionSnafu, ParseSqlValueSnafu,
|
||||
@@ -395,8 +395,18 @@ pub fn parse_with_options(parser: &mut Parser) -> Result<OptionMap> {
|
||||
.into_iter()
|
||||
.map(parse_option_string)
|
||||
.collect::<Result<HashMap<String, OptionValue>>>()?;
|
||||
for key in options.keys() {
|
||||
ensure!(validate_table_option(key), InvalidTableOptionSnafu { key });
|
||||
for (key, value) in &options {
|
||||
if key.starts_with(SEMANTIC_PREFIX) {
|
||||
// Semantic keys are whitelisted and value-checked against their domain,
|
||||
// so a user cannot set an unknown key or an out-of-range value.
|
||||
let value = value.as_string().unwrap_or_default();
|
||||
ensure!(
|
||||
validate_semantic_option(key, value),
|
||||
InvalidTableOptionSnafu { key }
|
||||
);
|
||||
} else {
|
||||
ensure!(validate_table_option(key), InvalidTableOptionSnafu { key });
|
||||
}
|
||||
}
|
||||
Ok(OptionMap::new(options))
|
||||
}
|
||||
|
||||
@@ -868,7 +868,25 @@ ENGINE=mito
|
||||
";
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
|
||||
assert_matches!(result, Err(Error::InvalidTableOption { .. }))
|
||||
assert_matches!(result, Err(Error::InvalidTableOption { .. }));
|
||||
|
||||
// A whitelisted semantic key with an in-domain value is accepted.
|
||||
let semantic = |with: &str| {
|
||||
let sql =
|
||||
format!("create table demo(host string, ts timestamp time index) with({with});");
|
||||
ParserContext::create_with_dialect(&sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
};
|
||||
assert!(semantic("'greptime.semantic.signal_type'='metric'").is_ok());
|
||||
// An out-of-domain value is rejected.
|
||||
assert_matches!(
|
||||
semantic("'greptime.semantic.signal_type'='spans'"),
|
||||
Err(Error::InvalidTableOption { .. })
|
||||
);
|
||||
// An unknown key under the semantic prefix is rejected.
|
||||
assert_matches!(
|
||||
semantic("'greptime.semantic.bogus'='x'"),
|
||||
Err(Error::InvalidTableOption { .. })
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user