feat: add semantic_type to information_schema.columns (#1530)

This commit is contained in:
Ning Sun
2023-05-06 07:48:37 +00:00
committed by GitHub
parent 2c82ded975
commit d679cfcb53
6 changed files with 52 additions and 31 deletions

View File

@@ -15,6 +15,9 @@
use std::sync::Arc;
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::{
SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
};
use common_query::physical_plan::TaskContext;
use common_recordbatch::RecordBatch;
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
@@ -40,6 +43,7 @@ const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name";
const DATA_TYPE: &str = "data_type";
const SEMANTIC_TYPE: &str = "semantic_type";
impl InformationSchemaColumns {
pub(super) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
@@ -49,6 +53,7 @@ impl InformationSchemaColumns {
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
]));
Self {
schema,
@@ -76,6 +81,7 @@ struct InformationSchemaColumnsBuilder {
table_names: StringVectorBuilder,
column_names: StringVectorBuilder,
data_types: StringVectorBuilder,
semantic_types: StringVectorBuilder,
}
impl InformationSchemaColumnsBuilder {
@@ -89,6 +95,7 @@ impl InformationSchemaColumnsBuilder {
table_names: StringVectorBuilder::with_capacity(42),
column_names: StringVectorBuilder::with_capacity(42),
data_types: StringVectorBuilder::with_capacity(42),
semantic_types: StringVectorBuilder::with_capacity(42),
}
}
@@ -100,14 +107,23 @@ impl InformationSchemaColumnsBuilder {
let Some(schema) = self.catalog_provider.schema(&schema_name).await? else { continue };
for table_name in schema.table_names().await? {
let Some(table) = schema.table(&table_name).await? else { continue };
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
for column in schema.column_schemas() {
for (idx, column) in schema.column_schemas().iter().enumerate() {
let semantic_type = if column.is_time_index() {
SEMANTIC_TYPE_TIME_INDEX
} else if keys.contains(&idx) {
SEMANTIC_TYPE_PRIMARY_KEY
} else {
SEMANTIC_TYPE_FIELD
};
self.add_column(
&catalog_name,
&schema_name,
&table_name,
&column.name,
column.data_type.name(),
semantic_type,
);
}
}
@@ -123,12 +139,14 @@ impl InformationSchemaColumnsBuilder {
table_name: &str,
column_name: &str,
data_type: &str,
semantic_type: &str,
) {
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.column_names.push(Some(column_name));
self.data_types.push(Some(data_type));
self.semantic_types.push(Some(semantic_type));
}
fn finish(&mut self) -> Result<RecordBatch> {
@@ -138,6 +156,7 @@ impl InformationSchemaColumnsBuilder {
Arc::new(self.table_names.finish()),
Arc::new(self.column_names.finish()),
Arc::new(self.data_types.finish()),
Arc::new(self.semantic_types.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}

View File

@@ -30,3 +30,7 @@ pub const SCRIPTS_TABLE_ID: u32 = 1;
pub const MITO_ENGINE: &str = "mito";
pub const IMMUTABLE_FILE_ENGINE: &str = "file";
pub const SEMANTIC_TYPE_PRIMARY_KEY: &str = "PRIMARY KEY";
pub const SEMANTIC_TYPE_FIELD: &str = "FIELD";
pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIME INDEX";

View File

@@ -1304,32 +1304,32 @@ async fn test_information_schema_dot_columns(instance: Arc<dyn MockInstance>) {
// User can only see information schema under current catalog.
// A necessary requirement to GreptimeCloud.
let sql = "select table_catalog, table_schema, table_name, column_name, data_type from information_schema.columns order by table_name";
let sql = "select table_catalog, table_schema, table_name, column_name, data_type, semantic_type from information_schema.columns order by table_name";
let output = execute_sql(&instance, sql).await;
let expected = "\
+---------------+--------------+------------+--------------+----------------------+
| table_catalog | table_schema | table_name | column_name | data_type |
+---------------+--------------+------------+--------------+----------------------+
| greptime | public | numbers | number | UInt32 |
| greptime | public | scripts | schema | String |
| greptime | public | scripts | name | String |
| greptime | public | scripts | script | String |
| greptime | public | scripts | engine | String |
| greptime | public | scripts | timestamp | TimestampMillisecond |
| greptime | public | scripts | gmt_created | TimestampMillisecond |
| greptime | public | scripts | gmt_modified | TimestampMillisecond |
+---------------+--------------+------------+--------------+----------------------+";
+---------------+--------------+------------+--------------+----------------------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+---------------+--------------+------------+--------------+----------------------+---------------+
| greptime | public | numbers | number | UInt32 | PRIMARY KEY |
| greptime | public | scripts | schema | String | PRIMARY KEY |
| greptime | public | scripts | name | String | PRIMARY KEY |
| greptime | public | scripts | script | String | FIELD |
| greptime | public | scripts | engine | String | FIELD |
| greptime | public | scripts | timestamp | TimestampMillisecond | TIME INDEX |
| greptime | public | scripts | gmt_created | TimestampMillisecond | FIELD |
| greptime | public | scripts | gmt_modified | TimestampMillisecond | FIELD |
+---------------+--------------+------------+--------------+----------------------+---------------+";
check_output_stream(output, expected).await;
let output = execute_sql_with(&instance, sql, query_ctx).await;
let expected = "\
+-----------------+----------------+---------------+-------------+-----------+
| table_catalog | table_schema | table_name | column_name | data_type |
+-----------------+----------------+---------------+-------------+-----------+
| another_catalog | another_schema | another_table | i | Int64 |
+-----------------+----------------+---------------+-------------+-----------+";
+-----------------+----------------+---------------+-------------+-----------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+-----------------+----------------+---------------+-------------+-----------+---------------+
| another_catalog | another_schema | another_table | i | Int64 | TIME INDEX |
+-----------------+----------------+---------------+-------------+-----------+---------------+";
check_output_stream(output, expected).await;
}

View File

@@ -18,7 +18,9 @@ use std::collections::HashMap;
use std::sync::Arc;
use catalog::CatalogManagerRef;
use common_catalog::consts::DEFAULT_CATALOG_NAME;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
};
use common_datasource::file_format::{infer_schemas, FileFormat, Format};
use common_datasource::lister::{Lister, Source};
use common_datasource::object_store::build_backend;
@@ -50,10 +52,6 @@ const COLUMN_NULLABLE_COLUMN: &str = "Null";
const COLUMN_DEFAULT_COLUMN: &str = "Default";
const COLUMN_SEMANTIC_TYPE_COLUMN: &str = "Semantic Type";
const SEMANTIC_TYPE_PRIMARY_KEY: &str = "PRIMARY KEY";
const SEMANTIC_TYPE_FIELD: &str = "FIELD";
const SEMANTIC_TYPE_TIME_INDEX: &str = "TIME INDEX";
const NULLABLE_YES: &str = "YES";
const NULLABLE_NO: &str = "NO";

View File

@@ -39,17 +39,17 @@ order by table_schema, table_name;
| greptime | my_db | foo | BASE TABLE | mito |
+---------------+--------------+------------+------------+--------+
select table_catalog, table_schema, table_name, column_name, data_type
select table_catalog, table_schema, table_name, column_name, data_type, semantic_type
from information_schema.columns
where table_catalog = 'greptime'
and table_schema != 'public'
order by table_schema, table_name;
+---------------+--------------+------------+-------------+-----------+
| table_catalog | table_schema | table_name | column_name | data_type |
+---------------+--------------+------------+-------------+-----------+
| greptime | my_db | foo | ts | Int64 |
+---------------+--------------+------------+-------------+-----------+
+---------------+--------------+------------+-------------+-----------+---------------+
| table_catalog | table_schema | table_name | column_name | data_type | semantic_type |
+---------------+--------------+------------+-------------+-----------+---------------+
| greptime | my_db | foo | ts | Int64 | TIME INDEX |
+---------------+--------------+------------+-------------+-----------+---------------+
use
public;

View File

@@ -20,7 +20,7 @@ where table_catalog = 'greptime'
and table_schema != 'public'
order by table_schema, table_name;
select table_catalog, table_schema, table_name, column_name, data_type
select table_catalog, table_schema, table_name, column_name, data_type, semantic_type
from information_schema.columns
where table_catalog = 'greptime'
and table_schema != 'public'