mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 05:42:57 +00:00
feat: add semantic_type to information_schema.columns (#1530)
This commit is contained in:
@@ -15,6 +15,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::{
|
||||
SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
|
||||
};
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::RecordBatch;
|
||||
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
|
||||
@@ -40,6 +43,7 @@ const TABLE_SCHEMA: &str = "table_schema";
|
||||
const TABLE_NAME: &str = "table_name";
|
||||
const COLUMN_NAME: &str = "column_name";
|
||||
const DATA_TYPE: &str = "data_type";
|
||||
const SEMANTIC_TYPE: &str = "semantic_type";
|
||||
|
||||
impl InformationSchemaColumns {
|
||||
pub(super) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
|
||||
@@ -49,6 +53,7 @@ impl InformationSchemaColumns {
|
||||
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
|
||||
]));
|
||||
Self {
|
||||
schema,
|
||||
@@ -76,6 +81,7 @@ struct InformationSchemaColumnsBuilder {
|
||||
table_names: StringVectorBuilder,
|
||||
column_names: StringVectorBuilder,
|
||||
data_types: StringVectorBuilder,
|
||||
semantic_types: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaColumnsBuilder {
|
||||
@@ -89,6 +95,7 @@ impl InformationSchemaColumnsBuilder {
|
||||
table_names: StringVectorBuilder::with_capacity(42),
|
||||
column_names: StringVectorBuilder::with_capacity(42),
|
||||
data_types: StringVectorBuilder::with_capacity(42),
|
||||
semantic_types: StringVectorBuilder::with_capacity(42),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,14 +107,23 @@ impl InformationSchemaColumnsBuilder {
|
||||
let Some(schema) = self.catalog_provider.schema(&schema_name).await? else { continue };
|
||||
for table_name in schema.table_names().await? {
|
||||
let Some(table) = schema.table(&table_name).await? else { continue };
|
||||
let keys = &table.table_info().meta.primary_key_indices;
|
||||
let schema = table.schema();
|
||||
for column in schema.column_schemas() {
|
||||
for (idx, column) in schema.column_schemas().iter().enumerate() {
|
||||
let semantic_type = if column.is_time_index() {
|
||||
SEMANTIC_TYPE_TIME_INDEX
|
||||
} else if keys.contains(&idx) {
|
||||
SEMANTIC_TYPE_PRIMARY_KEY
|
||||
} else {
|
||||
SEMANTIC_TYPE_FIELD
|
||||
};
|
||||
self.add_column(
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
&column.name,
|
||||
column.data_type.name(),
|
||||
semantic_type,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -123,12 +139,14 @@ impl InformationSchemaColumnsBuilder {
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
data_type: &str,
|
||||
semantic_type: &str,
|
||||
) {
|
||||
self.catalog_names.push(Some(catalog_name));
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.column_names.push(Some(column_name));
|
||||
self.data_types.push(Some(data_type));
|
||||
self.semantic_types.push(Some(semantic_type));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
@@ -138,6 +156,7 @@ impl InformationSchemaColumnsBuilder {
|
||||
Arc::new(self.table_names.finish()),
|
||||
Arc::new(self.column_names.finish()),
|
||||
Arc::new(self.data_types.finish()),
|
||||
Arc::new(self.semantic_types.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
|
||||
@@ -30,3 +30,7 @@ pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
pub const IMMUTABLE_FILE_ENGINE: &str = "file";
|
||||
|
||||
pub const SEMANTIC_TYPE_PRIMARY_KEY: &str = "PRIMARY KEY";
|
||||
pub const SEMANTIC_TYPE_FIELD: &str = "FIELD";
|
||||
pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIME INDEX";
|
||||
|
||||
@@ -1304,32 +1304,32 @@ async fn test_information_schema_dot_columns(instance: Arc<dyn MockInstance>) {
|
||||
|
||||
// User can only see information schema under current catalog.
|
||||
// A necessary requirement to GreptimeCloud.
|
||||
let sql = "select table_catalog, table_schema, table_name, column_name, data_type from information_schema.columns order by table_name";
|
||||
let sql = "select table_catalog, table_schema, table_name, column_name, data_type, semantic_type from information_schema.columns order by table_name";
|
||||
|
||||
let output = execute_sql(&instance, sql).await;
|
||||
let expected = "\
|
||||
+---------------+--------------+------------+--------------+----------------------+
|
||||
| table_catalog | table_schema | table_name | column_name | data_type |
|
||||
+---------------+--------------+------------+--------------+----------------------+
|
||||
| greptime | public | numbers | number | UInt32 |
|
||||
| greptime | public | scripts | schema | String |
|
||||
| greptime | public | scripts | name | String |
|
||||
| greptime | public | scripts | script | String |
|
||||
| greptime | public | scripts | engine | String |
|
||||
| greptime | public | scripts | timestamp | TimestampMillisecond |
|
||||
| greptime | public | scripts | gmt_created | TimestampMillisecond |
|
||||
| greptime | public | scripts | gmt_modified | TimestampMillisecond |
|
||||
+---------------+--------------+------------+--------------+----------------------+";
|
||||
+---------------+--------------+------------+--------------+----------------------+---------------+
|
||||
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
|
||||
+---------------+--------------+------------+--------------+----------------------+---------------+
|
||||
| greptime | public | numbers | number | UInt32 | PRIMARY KEY |
|
||||
| greptime | public | scripts | schema | String | PRIMARY KEY |
|
||||
| greptime | public | scripts | name | String | PRIMARY KEY |
|
||||
| greptime | public | scripts | script | String | FIELD |
|
||||
| greptime | public | scripts | engine | String | FIELD |
|
||||
| greptime | public | scripts | timestamp | TimestampMillisecond | TIME INDEX |
|
||||
| greptime | public | scripts | gmt_created | TimestampMillisecond | FIELD |
|
||||
| greptime | public | scripts | gmt_modified | TimestampMillisecond | FIELD |
|
||||
+---------------+--------------+------------+--------------+----------------------+---------------+";
|
||||
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
let output = execute_sql_with(&instance, sql, query_ctx).await;
|
||||
let expected = "\
|
||||
+-----------------+----------------+---------------+-------------+-----------+
|
||||
| table_catalog | table_schema | table_name | column_name | data_type |
|
||||
+-----------------+----------------+---------------+-------------+-----------+
|
||||
| another_catalog | another_schema | another_table | i | Int64 |
|
||||
+-----------------+----------------+---------------+-------------+-----------+";
|
||||
+-----------------+----------------+---------------+-------------+-----------+---------------+
|
||||
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
|
||||
+-----------------+----------------+---------------+-------------+-----------+---------------+
|
||||
| another_catalog | another_schema | another_table | i | Int64 | TIME INDEX |
|
||||
+-----------------+----------------+---------------+-------------+-----------+---------------+";
|
||||
|
||||
check_output_stream(output, expected).await;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,9 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::consts::DEFAULT_CATALOG_NAME;
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
|
||||
};
|
||||
use common_datasource::file_format::{infer_schemas, FileFormat, Format};
|
||||
use common_datasource::lister::{Lister, Source};
|
||||
use common_datasource::object_store::build_backend;
|
||||
@@ -50,10 +52,6 @@ const COLUMN_NULLABLE_COLUMN: &str = "Null";
|
||||
const COLUMN_DEFAULT_COLUMN: &str = "Default";
|
||||
const COLUMN_SEMANTIC_TYPE_COLUMN: &str = "Semantic Type";
|
||||
|
||||
const SEMANTIC_TYPE_PRIMARY_KEY: &str = "PRIMARY KEY";
|
||||
const SEMANTIC_TYPE_FIELD: &str = "FIELD";
|
||||
const SEMANTIC_TYPE_TIME_INDEX: &str = "TIME INDEX";
|
||||
|
||||
const NULLABLE_YES: &str = "YES";
|
||||
const NULLABLE_NO: &str = "NO";
|
||||
|
||||
|
||||
@@ -39,17 +39,17 @@ order by table_schema, table_name;
|
||||
| greptime | my_db | foo | BASE TABLE | mito |
|
||||
+---------------+--------------+------------+------------+--------+
|
||||
|
||||
select table_catalog, table_schema, table_name, column_name, data_type
|
||||
select table_catalog, table_schema, table_name, column_name, data_type, semantic_type
|
||||
from information_schema.columns
|
||||
where table_catalog = 'greptime'
|
||||
and table_schema != 'public'
|
||||
order by table_schema, table_name;
|
||||
|
||||
+---------------+--------------+------------+-------------+-----------+
|
||||
| table_catalog | table_schema | table_name | column_name | data_type |
|
||||
+---------------+--------------+------------+-------------+-----------+
|
||||
| greptime | my_db | foo | ts | Int64 |
|
||||
+---------------+--------------+------------+-------------+-----------+
|
||||
+---------------+--------------+------------+-------------+-----------+---------------+
|
||||
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
|
||||
+---------------+--------------+------------+-------------+-----------+---------------+
|
||||
| greptime | my_db | foo | ts | Int64 | TIME INDEX |
|
||||
+---------------+--------------+------------+-------------+-----------+---------------+
|
||||
|
||||
use
|
||||
public;
|
||||
|
||||
@@ -20,7 +20,7 @@ where table_catalog = 'greptime'
|
||||
and table_schema != 'public'
|
||||
order by table_schema, table_name;
|
||||
|
||||
select table_catalog, table_schema, table_name, column_name, data_type
|
||||
select table_catalog, table_schema, table_name, column_name, data_type, semantic_type
|
||||
from information_schema.columns
|
||||
where table_catalog = 'greptime'
|
||||
and table_schema != 'public'
|
||||
|
||||
Reference in New Issue
Block a user