diff --git a/src/catalog/src/information_schema/columns.rs b/src/catalog/src/information_schema/columns.rs index 0babffb63f..760dd75fb2 100644 --- a/src/catalog/src/information_schema/columns.rs +++ b/src/catalog/src/information_schema/columns.rs @@ -15,6 +15,9 @@ use std::sync::Arc; use arrow_schema::SchemaRef as ArrowSchemaRef; +use common_catalog::consts::{ + SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX, +}; use common_query::physical_plan::TaskContext; use common_recordbatch::RecordBatch; use datafusion::datasource::streaming::PartitionStream as DfPartitionStream; @@ -40,6 +43,7 @@ const TABLE_SCHEMA: &str = "table_schema"; const TABLE_NAME: &str = "table_name"; const COLUMN_NAME: &str = "column_name"; const DATA_TYPE: &str = "data_type"; +const SEMANTIC_TYPE: &str = "semantic_type"; impl InformationSchemaColumns { pub(super) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self { @@ -49,6 +53,7 @@ impl InformationSchemaColumns { ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false), ])); Self { schema, @@ -76,6 +81,7 @@ struct InformationSchemaColumnsBuilder { table_names: StringVectorBuilder, column_names: StringVectorBuilder, data_types: StringVectorBuilder, + semantic_types: StringVectorBuilder, } impl InformationSchemaColumnsBuilder { @@ -89,6 +95,7 @@ impl InformationSchemaColumnsBuilder { table_names: StringVectorBuilder::with_capacity(42), column_names: StringVectorBuilder::with_capacity(42), data_types: StringVectorBuilder::with_capacity(42), + semantic_types: StringVectorBuilder::with_capacity(42), } } @@ -100,14 +107,23 @@ impl InformationSchemaColumnsBuilder { let Some(schema) = self.catalog_provider.schema(&schema_name).await? else { continue }; for table_name in schema.table_names().await? { let Some(table) = schema.table(&table_name).await? else { continue }; + let keys = &table.table_info().meta.primary_key_indices; let schema = table.schema(); - for column in schema.column_schemas() { + for (idx, column) in schema.column_schemas().iter().enumerate() { + let semantic_type = if column.is_time_index() { + SEMANTIC_TYPE_TIME_INDEX + } else if keys.contains(&idx) { + SEMANTIC_TYPE_PRIMARY_KEY + } else { + SEMANTIC_TYPE_FIELD + }; self.add_column( &catalog_name, &schema_name, &table_name, &column.name, column.data_type.name(), + semantic_type, ); } } @@ -123,12 +139,14 @@ impl InformationSchemaColumnsBuilder { table_name: &str, column_name: &str, data_type: &str, + semantic_type: &str, ) { self.catalog_names.push(Some(catalog_name)); self.schema_names.push(Some(schema_name)); self.table_names.push(Some(table_name)); self.column_names.push(Some(column_name)); self.data_types.push(Some(data_type)); + self.semantic_types.push(Some(semantic_type)); } fn finish(&mut self) -> Result { @@ -138,6 +156,7 @@ impl InformationSchemaColumnsBuilder { Arc::new(self.table_names.finish()), Arc::new(self.column_names.finish()), Arc::new(self.data_types.finish()), + Arc::new(self.semantic_types.finish()), ]; RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu) } diff --git a/src/common/catalog/src/consts.rs b/src/common/catalog/src/consts.rs index 84a56b8449..3130770c27 100644 --- a/src/common/catalog/src/consts.rs +++ b/src/common/catalog/src/consts.rs @@ -30,3 +30,7 @@ pub const SCRIPTS_TABLE_ID: u32 = 1; pub const MITO_ENGINE: &str = "mito"; pub const IMMUTABLE_FILE_ENGINE: &str = "file"; + +pub const SEMANTIC_TYPE_PRIMARY_KEY: &str = "PRIMARY KEY"; +pub const SEMANTIC_TYPE_FIELD: &str = "FIELD"; +pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIME INDEX"; diff --git a/src/frontend/src/tests/instance_test.rs b/src/frontend/src/tests/instance_test.rs index 17c59666d1..0e97d48e10 100644 --- a/src/frontend/src/tests/instance_test.rs +++ b/src/frontend/src/tests/instance_test.rs @@ -1304,32 +1304,32 @@ async fn test_information_schema_dot_columns(instance: Arc) { // User can only see information schema under current catalog. // A necessary requirement to GreptimeCloud. - let sql = "select table_catalog, table_schema, table_name, column_name, data_type from information_schema.columns order by table_name"; + let sql = "select table_catalog, table_schema, table_name, column_name, data_type, semantic_type from information_schema.columns order by table_name"; let output = execute_sql(&instance, sql).await; let expected = "\ -+---------------+--------------+------------+--------------+----------------------+ -| table_catalog | table_schema | table_name | column_name | data_type | -+---------------+--------------+------------+--------------+----------------------+ -| greptime | public | numbers | number | UInt32 | -| greptime | public | scripts | schema | String | -| greptime | public | scripts | name | String | -| greptime | public | scripts | script | String | -| greptime | public | scripts | engine | String | -| greptime | public | scripts | timestamp | TimestampMillisecond | -| greptime | public | scripts | gmt_created | TimestampMillisecond | -| greptime | public | scripts | gmt_modified | TimestampMillisecond | -+---------------+--------------+------------+--------------+----------------------+"; ++---------------+--------------+------------+--------------+----------------------+---------------+ +| table_catalog | table_schema | table_name | column_name | data_type | semantic_type | ++---------------+--------------+------------+--------------+----------------------+---------------+ +| greptime | public | numbers | number | UInt32 | PRIMARY KEY | +| greptime | public | scripts | schema | String | PRIMARY KEY | +| greptime | public | scripts | name | String | PRIMARY KEY | +| greptime | public | scripts | script | String | FIELD | +| greptime | public | scripts | engine | String | FIELD | +| greptime | public | scripts | timestamp | TimestampMillisecond | TIME INDEX | +| greptime | public | scripts | gmt_created | TimestampMillisecond | FIELD | +| greptime | public | scripts | gmt_modified | TimestampMillisecond | FIELD | ++---------------+--------------+------------+--------------+----------------------+---------------+"; check_output_stream(output, expected).await; let output = execute_sql_with(&instance, sql, query_ctx).await; let expected = "\ -+-----------------+----------------+---------------+-------------+-----------+ -| table_catalog | table_schema | table_name | column_name | data_type | -+-----------------+----------------+---------------+-------------+-----------+ -| another_catalog | another_schema | another_table | i | Int64 | -+-----------------+----------------+---------------+-------------+-----------+"; ++-----------------+----------------+---------------+-------------+-----------+---------------+ +| table_catalog | table_schema | table_name | column_name | data_type | semantic_type | ++-----------------+----------------+---------------+-------------+-----------+---------------+ +| another_catalog | another_schema | another_table | i | Int64 | TIME INDEX | ++-----------------+----------------+---------------+-------------+-----------+---------------+"; check_output_stream(output, expected).await; } diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 1705a37202..17f3926426 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -18,7 +18,9 @@ use std::collections::HashMap; use std::sync::Arc; use catalog::CatalogManagerRef; -use common_catalog::consts::DEFAULT_CATALOG_NAME; +use common_catalog::consts::{ + DEFAULT_CATALOG_NAME, SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX, +}; use common_datasource::file_format::{infer_schemas, FileFormat, Format}; use common_datasource::lister::{Lister, Source}; use common_datasource::object_store::build_backend; @@ -50,10 +52,6 @@ const COLUMN_NULLABLE_COLUMN: &str = "Null"; const COLUMN_DEFAULT_COLUMN: &str = "Default"; const COLUMN_SEMANTIC_TYPE_COLUMN: &str = "Semantic Type"; -const SEMANTIC_TYPE_PRIMARY_KEY: &str = "PRIMARY KEY"; -const SEMANTIC_TYPE_FIELD: &str = "FIELD"; -const SEMANTIC_TYPE_TIME_INDEX: &str = "TIME INDEX"; - const NULLABLE_YES: &str = "YES"; const NULLABLE_NO: &str = "NO"; diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result index c7b9ab4e1a..de07ebe37c 100644 --- a/tests/cases/standalone/common/system/information_schema.result +++ b/tests/cases/standalone/common/system/information_schema.result @@ -39,17 +39,17 @@ order by table_schema, table_name; | greptime | my_db | foo | BASE TABLE | mito | +---------------+--------------+------------+------------+--------+ -select table_catalog, table_schema, table_name, column_name, data_type +select table_catalog, table_schema, table_name, column_name, data_type, semantic_type from information_schema.columns where table_catalog = 'greptime' and table_schema != 'public' order by table_schema, table_name; -+---------------+--------------+------------+-------------+-----------+ -| table_catalog | table_schema | table_name | column_name | data_type | -+---------------+--------------+------------+-------------+-----------+ -| greptime | my_db | foo | ts | Int64 | -+---------------+--------------+------------+-------------+-----------+ ++---------------+--------------+------------+-------------+-----------+---------------+ +| table_catalog | table_schema | table_name | column_name | data_type | semantic_type | ++---------------+--------------+------------+-------------+-----------+---------------+ +| greptime | my_db | foo | ts | Int64 | TIME INDEX | ++---------------+--------------+------------+-------------+-----------+---------------+ use public; diff --git a/tests/cases/standalone/common/system/information_schema.sql b/tests/cases/standalone/common/system/information_schema.sql index b33a72075f..c033cad8b6 100644 --- a/tests/cases/standalone/common/system/information_schema.sql +++ b/tests/cases/standalone/common/system/information_schema.sql @@ -20,7 +20,7 @@ where table_catalog = 'greptime' and table_schema != 'public' order by table_schema, table_name; -select table_catalog, table_schema, table_name, column_name, data_type +select table_catalog, table_schema, table_name, column_name, data_type, semantic_type from information_schema.columns where table_catalog = 'greptime' and table_schema != 'public'