feat: Move create table logic to frontend (#455)

* refactor: dependency, from frontend depends on datanode to datanode depends on frontend

* wip: start frontend in datanode

* wip: migrate create database to frontend

* wip: impl alter table

* fix: CR comments

* feat: add table id and region ids field to CreateExpr

* chore: rebase develop

* refactor: frontend catalog should set from datanode

* feat: gRPC AddColumn request support add multi columns

* wip: move create table and create-on-insertion to frontend

* wip: error handling

* fix: some unit tests

* fix: all unit tests

* chore: merge develop

* feat: add create/alter-on-insertion to dist_insert/sql_dist_insert

* fix: add region number/catalog/schema to InsertExpr

* feat: add handle_create_table/handle_create_database...

* fix: remove catalog from insert expr

* fix: CR comments

* fix: when running in standalone mode, mysql opts and postgres opts should pass to frontend so that auctually running service can change the port to listen on

* refactor: add a standalone subcommand, move frontend start stuff to cmd package

* chore: optimize create table failure logs

* docs: change readme

* docs: update readme
This commit is contained in:
Lei, Huang
2022-11-14 10:54:35 +08:00
committed by GitHub
parent 488eabce4a
commit fae331d2ba
52 changed files with 1356 additions and 721 deletions

View File

@@ -5,49 +5,46 @@ use std::{
sync::Arc,
};
use api::{
helper::ColumnDataTypeWrapper,
v1::{
codec::InsertBatch,
column::{SemanticType, Values},
Column,
},
use api::v1::{
codec::InsertBatch,
column::{SemanticType, Values},
AddColumns, Column,
};
use api::v1::{AddColumn, ColumnDef, CreateExpr};
use common_base::BitVec;
use common_time::timestamp::Timestamp;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use datatypes::schema::SchemaRef;
use datatypes::{data_type::ConcreteDataType, value::Value, vectors::VectorBuilder};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableId;
use table::{
requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest, InsertRequest},
requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest},
Table,
};
use crate::error::{
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateSchemaSnafu, DecodeInsertSnafu,
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, MissingTimestampColumnSnafu, Result,
ColumnNotFoundSnafu, DecodeInsertSnafu, DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu,
MissingTimestampColumnSnafu, Result,
};
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
#[inline]
fn build_column_schema(column_name: &str, datatype: i32, nullable: bool) -> Result<ColumnSchema> {
let datatype_wrapper = ColumnDataTypeWrapper::try_new(datatype).context(ColumnDataTypeSnafu)?;
Ok(ColumnSchema::new(
column_name,
datatype_wrapper.into(),
nullable,
))
fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnDef {
ColumnDef {
name: column_name.to_string(),
datatype,
is_nullable: nullable,
default_constraint: None,
}
}
pub fn find_new_columns(
schema: &SchemaRef,
insert_batches: &[InsertBatch],
) -> Result<Option<Vec<AddColumnRequest>>> {
let mut requests = Vec::default();
) -> Result<Option<AddColumns>> {
let mut columns_to_add = Vec::default();
let mut new_columns: HashSet<String> = HashSet::default();
for InsertBatch { columns, row_count } in insert_batches {
@@ -65,10 +62,9 @@ pub fn find_new_columns(
if schema.column_schema_by_name(column_name).is_none()
&& !new_columns.contains(column_name)
{
let column_schema = build_column_schema(column_name, *datatype, true)?;
requests.push(AddColumnRequest {
column_schema,
let column_def = Some(build_column_def(column_name, *datatype, true));
columns_to_add.push(AddColumn {
column_def,
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
});
new_columns.insert(column_name.to_string());
@@ -76,10 +72,12 @@ pub fn find_new_columns(
}
}
if requests.is_empty() {
if columns_to_add.is_empty() {
Ok(None)
} else {
Ok(Some(requests))
Ok(Some(AddColumns {
add_columns: columns_to_add,
}))
}
}
@@ -98,15 +96,15 @@ pub fn build_alter_table_request(
}
/// Try to build create table request from insert data.
pub fn build_create_table_request(
pub fn build_create_expr_from_insertion(
catalog_name: &str,
schema_name: &str,
table_id: TableId,
table_id: Option<TableId>,
table_name: &str,
insert_batches: &[InsertBatch],
) -> Result<CreateTableRequest> {
) -> Result<CreateExpr> {
let mut new_columns: HashSet<String> = HashSet::default();
let mut column_schemas = Vec::default();
let mut column_defs = Vec::default();
let mut primary_key_indices = Vec::default();
let mut timestamp_index = usize::MAX;
@@ -124,9 +122,8 @@ pub fn build_create_table_request(
{
if !new_columns.contains(column_name) {
let mut is_nullable = true;
let mut is_time_index = false;
match *semantic_type {
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_schemas.len()),
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
TIMESTAMP_SEMANTIC_TYPE => {
ensure!(
timestamp_index == usize::MAX,
@@ -135,42 +132,42 @@ pub fn build_create_table_request(
duplicated: column_name,
}
);
timestamp_index = column_schemas.len();
is_time_index = true;
timestamp_index = column_defs.len();
// Timestamp column must not be null.
is_nullable = false;
}
_ => {}
}
let column_schema = build_column_schema(column_name, *datatype, is_nullable)?
.with_time_index(is_time_index);
column_schemas.push(column_schema);
let column_def = build_column_def(column_name, *datatype, is_nullable);
column_defs.push(column_def);
new_columns.insert(column_name.to_string());
}
}
ensure!(timestamp_index != usize::MAX, MissingTimestampColumnSnafu);
let timestamp_field_name = columns[timestamp_index].column_name.clone();
let schema = Arc::new(
SchemaBuilder::try_from(column_schemas)
.unwrap()
.build()
.context(CreateSchemaSnafu)?,
);
let primary_keys = primary_key_indices
.iter()
.map(|idx| columns[*idx].column_name.clone())
.collect::<Vec<_>>();
return Ok(CreateTableRequest {
id: table_id,
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
let expr = CreateExpr {
catalog_name: Some(catalog_name.to_string()),
schema_name: Some(schema_name.to_string()),
table_name: table_name.to_string(),
desc: None,
schema,
desc: Some("Created on insertion".to_string()),
column_defs,
time_index: timestamp_field_name,
primary_keys,
create_if_not_exists: true,
primary_key_indices,
table_options: HashMap::new(),
region_numbers: vec![0],
});
table_options: Default::default(),
table_id,
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
};
return Ok(expr);
}
IllegalInsertDataSnafu.fail()
@@ -233,7 +230,7 @@ pub fn insertion_expr_to_request(
}
#[inline]
pub fn insert_batches(bytes_vec: Vec<Vec<u8>>) -> Result<Vec<InsertBatch>> {
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
bytes_vec
.iter()
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
@@ -365,6 +362,7 @@ mod tests {
use std::any::Any;
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::{
codec::InsertBatch,
column::{self, SemanticType, Values},
@@ -379,50 +377,114 @@ mod tests {
schema::{ColumnSchema, SchemaBuilder, SchemaRef},
value::Value,
};
use snafu::ResultExt;
use table::error::Result as TableResult;
use table::metadata::TableInfoRef;
use table::Table;
use super::{
build_column_schema, build_create_table_request, convert_values, find_new_columns,
insert_batches, insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE,
TIMESTAMP_SEMANTIC_TYPE,
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
};
use crate::error;
use crate::error::ColumnDataTypeSnafu;
#[inline]
fn build_column_schema(
column_name: &str,
datatype: i32,
nullable: bool,
) -> error::Result<ColumnSchema> {
let datatype_wrapper =
ColumnDataTypeWrapper::try_new(datatype).context(ColumnDataTypeSnafu)?;
Ok(ColumnSchema::new(
column_name,
datatype_wrapper.into(),
nullable,
))
}
#[test]
fn test_build_create_table_request() {
let table_id = 10;
let table_id = Some(10);
let table_name = "test_metric";
assert!(build_create_table_request("", "", table_id, table_name, &[]).is_err());
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
let insert_batches = insert_batches(mock_insert_batches()).unwrap();
let mock_batch_bytes = mock_insert_batches();
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
let req =
build_create_table_request("", "", table_id, table_name, &insert_batches).unwrap();
assert_eq!(table_id, req.id);
assert_eq!(table_name, req.table_name);
assert!(req.desc.is_none());
assert_eq!(vec![0], req.primary_key_indices);
let create_expr =
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
.unwrap();
assert_eq!(table_id, create_expr.table_id);
assert_eq!(table_name, create_expr.table_name);
assert_eq!(Some("Created on insertion".to_string()), create_expr.desc);
assert_eq!(
vec![create_expr.column_defs[0].name.clone()],
create_expr.primary_keys
);
let column_defs = create_expr.column_defs;
assert_eq!(column_defs[3].name, create_expr.time_index);
assert_eq!(4, column_defs.len());
let schema = req.schema;
assert_eq!(Some(3), schema.timestamp_index());
assert_eq!(4, schema.num_columns());
assert_eq!(
ConcreteDataType::string_datatype(),
schema.column_schema_by_name("host").unwrap().data_type
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "host")
.unwrap()
.datatype
)
.unwrap()
)
);
assert_eq!(
ConcreteDataType::float64_datatype(),
schema.column_schema_by_name("cpu").unwrap().data_type
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "cpu")
.unwrap()
.datatype
)
.unwrap()
)
);
assert_eq!(
ConcreteDataType::float64_datatype(),
schema.column_schema_by_name("memory").unwrap().data_type
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "memory")
.unwrap()
.datatype
)
.unwrap()
)
);
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
schema.column_schema_by_name("ts").unwrap().data_type
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "ts")
.unwrap()
.datatype
)
.unwrap()
)
);
}
@@ -440,22 +502,32 @@ mod tests {
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
let insert_batches = insert_batches(mock_insert_batches()).unwrap();
let new_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
let mock_insert_bytes = mock_insert_batches();
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
assert_eq!(2, new_columns.len());
let host_column = &new_columns[0];
assert_eq!(2, add_columns.add_columns.len());
let host_column = &add_columns.add_columns[0];
assert!(host_column.is_key);
assert_eq!(
ConcreteDataType::string_datatype(),
host_column.column_schema.data_type
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(host_column.column_def.as_ref().unwrap().datatype)
.unwrap()
)
);
let memory_column = &new_columns[1];
let memory_column = &add_columns.add_columns[1];
assert!(!memory_column.is_key);
assert_eq!(
ConcreteDataType::float64_datatype(),
memory_column.column_schema.data_type
)
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(memory_column.column_def.as_ref().unwrap().datatype)
.unwrap()
)
);
}
#[test]
@@ -465,7 +537,7 @@ mod tests {
let values = insert_expr::Values {
values: mock_insert_batches(),
};
let insert_batches = insert_batches(values.values).unwrap();
let insert_batches = insert_batches(&values.values).unwrap();
let insert_req =
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();

View File

@@ -1,6 +1,6 @@
pub mod error;
mod insert;
pub use insert::{
build_alter_table_request, build_create_table_request, find_new_columns, insert_batches,
build_alter_table_request, build_create_expr_from_insertion, find_new_columns, insert_batches,
insertion_expr_to_request,
};