feat: impl alter table in distributed mode (#572)

This commit is contained in:
Lei, HUANG
2022-11-22 15:17:25 +08:00
committed by GitHub
parent 0791c65149
commit c144a1b20e
26 changed files with 425 additions and 221 deletions

View File

@@ -0,0 +1,18 @@
[package]
name = "common-grpc-expr"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
[dependencies]
api = { path = "../../api" }
async-trait = "0.1"
common-base = { path = "../base" }
common-error = { path = "../error" }
common-telemetry = { path = "../telemetry" }
common-time = { path = "../time" }
common-catalog = { path = "../catalog" }
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }

View File

@@ -0,0 +1,234 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::alter_expr::Kind;
use api::v1::{AlterExpr, CreateExpr, DropColumns};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableId;
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
use crate::error::{
ColumnNotFoundSnafu, CreateSchemaSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
MissingTimestampColumnSnafu, Result,
};
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
match expr.kind {
Some(Kind::AddColumns(add_columns)) => {
let add_column_requests = add_columns
.add_columns
.into_iter()
.map(|ac| {
let column_def = ac.column_def.context(MissingFieldSnafu {
field: "column_def",
})?;
let schema =
column_def
.try_as_column_schema()
.context(InvalidColumnDefSnafu {
column: &column_def.name,
})?;
Ok(AddColumnRequest {
column_schema: schema,
is_key: ac.is_key,
})
})
.collect::<Result<Vec<_>>>()?;
let alter_kind = AlterKind::AddColumns {
columns: add_column_requests,
};
let request = AlterTableRequest {
catalog_name: expr.catalog_name,
schema_name: expr.schema_name,
table_name: expr.table_name,
alter_kind,
};
Ok(Some(request))
}
Some(Kind::DropColumns(DropColumns { drop_columns })) => {
let alter_kind = AlterKind::DropColumns {
names: drop_columns.into_iter().map(|c| c.name).collect(),
};
let request = AlterTableRequest {
catalog_name: expr.catalog_name,
schema_name: expr.schema_name,
table_name: expr.table_name,
alter_kind,
};
Ok(Some(request))
}
None => Ok(None),
}
}
pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
let column_schemas = expr
.column_defs
.iter()
.map(|x| {
x.try_as_column_schema()
.context(InvalidColumnDefSnafu { column: &x.name })
})
.collect::<Result<Vec<ColumnSchema>>>()?;
ensure!(
column_schemas
.iter()
.any(|column| column.name == expr.time_index),
MissingTimestampColumnSnafu {
msg: format!("CreateExpr: {:?}", expr)
}
);
let column_schemas = column_schemas
.into_iter()
.map(|column_schema| {
if column_schema.name == expr.time_index {
column_schema.with_time_index(true)
} else {
column_schema
}
})
.collect::<Vec<_>>();
Ok(Arc::new(
SchemaBuilder::try_from(column_schemas)
.context(CreateSchemaSnafu)?
.build()
.context(CreateSchemaSnafu)?,
))
}
pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
let schema = create_table_schema(&expr)?;
let primary_key_indices = expr
.primary_keys
.iter()
.map(|key| {
schema
.column_index_by_name(key)
.context(ColumnNotFoundSnafu {
column_name: key,
table_name: &expr.table_name,
})
})
.collect::<Result<Vec<usize>>>()?;
let catalog_name = expr
.catalog_name
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
let schema_name = expr
.schema_name
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
let region_ids = if expr.region_ids.is_empty() {
vec![0]
} else {
expr.region_ids
};
Ok(CreateTableRequest {
id: table_id,
catalog_name,
schema_name,
table_name: expr.table_name,
desc: expr.desc,
schema,
region_numbers: region_ids,
primary_key_indices,
create_if_not_exists: expr.create_if_not_exists,
table_options: expr.table_options,
})
}
#[cfg(test)]
mod tests {
use api::v1::{AddColumn, AddColumns, ColumnDataType, ColumnDef, DropColumn};
use datatypes::prelude::ConcreteDataType;
use super::*;
#[test]
fn test_alter_expr_to_request() {
let expr = AlterExpr {
catalog_name: None,
schema_name: None,
table_name: "monitor".to_string(),
kind: Some(Kind::AddColumns(AddColumns {
add_columns: vec![AddColumn {
column_def: Some(ColumnDef {
name: "mem_usage".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: false,
default_constraint: None,
}),
is_key: false,
}],
})),
};
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
assert_eq!(None, alter_request.catalog_name);
assert_eq!(None, alter_request.schema_name);
assert_eq!("monitor".to_string(), alter_request.table_name);
let add_column = match alter_request.alter_kind {
AlterKind::AddColumns { mut columns } => columns.pop().unwrap(),
_ => unreachable!(),
};
assert!(!add_column.is_key);
assert_eq!("mem_usage", add_column.column_schema.name);
assert_eq!(
ConcreteDataType::float64_datatype(),
add_column.column_schema.data_type
);
}
#[test]
fn test_drop_column_expr() {
let expr = AlterExpr {
catalog_name: Some("test_catalog".to_string()),
schema_name: Some("test_schema".to_string()),
table_name: "monitor".to_string(),
kind: Some(Kind::DropColumns(DropColumns {
drop_columns: vec![DropColumn {
name: "mem_usage".to_string(),
}],
})),
};
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
assert_eq!(Some("test_catalog".to_string()), alter_request.catalog_name);
assert_eq!(Some("test_schema".to_string()), alter_request.schema_name);
assert_eq!("monitor".to_string(), alter_request.table_name);
let mut drop_names = match alter_request.alter_kind {
AlterKind::DropColumns { names } => names,
_ => unreachable!(),
};
assert_eq!(1, drop_names.len());
assert_eq!("mem_usage".to_string(), drop_names.pop().unwrap());
}
}

View File

@@ -0,0 +1,122 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use api::DecodeError;
use common_error::ext::ErrorExt;
use common_error::prelude::{Snafu, StatusCode};
use snafu::{Backtrace, ErrorCompat};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Column `{}` not found in table `{}`", column_name, table_name))]
ColumnNotFound {
column_name: String,
table_name: String,
},
#[snafu(display("Failed to convert bytes to insert batch, source: {}", source))]
DecodeInsert { source: DecodeError },
#[snafu(display("Illegal insert data"))]
IllegalInsertData,
#[snafu(display("Column datatype error, source: {}", source))]
ColumnDataType {
#[snafu(backtrace)]
source: api::error::Error,
},
#[snafu(display("Failed to create schema when creating table, source: {}", source))]
CreateSchema {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Duplicated timestamp column in gRPC requests, exists {}, duplicated: {}",
exists,
duplicated
))]
DuplicatedTimestampColumn {
exists: String,
duplicated: String,
backtrace: Backtrace,
},
#[snafu(display("Missing timestamp column, msg: {}", msg))]
MissingTimestampColumn { msg: String, backtrace: Backtrace },
#[snafu(display("Invalid column proto: {}", err_msg))]
InvalidColumnProto {
err_msg: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to create vector, source: {}", source))]
CreateVector {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField { field: String, backtrace: Backtrace },
#[snafu(display("Invalid column default constraint, source: {}", source))]
ColumnDefaultConstraint {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Invalid column proto definition, column: {}, source: {}",
column,
source
))]
InvalidColumnDef {
column: String,
#[snafu(backtrace)]
source: api::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ColumnNotFound { .. } => StatusCode::TableColumnNotFound,
Error::DecodeInsert { .. } | Error::IllegalInsertData { .. } => {
StatusCode::InvalidArguments
}
Error::ColumnDataType { .. } => StatusCode::Internal,
Error::CreateSchema { .. }
| Error::DuplicatedTimestampColumn { .. }
| Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments,
Error::InvalidColumnProto { .. } => StatusCode::InvalidArguments,
Error::CreateVector { .. } => StatusCode::InvalidArguments,
Error::MissingField { .. } => StatusCode::InvalidArguments,
Error::ColumnDefaultConstraint { source, .. } => source.status_code(),
Error::InvalidColumnDef { source, .. } => source.status_code(),
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -0,0 +1,794 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::ops::Deref;
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::codec::InsertBatch;
use api::v1::column::{SemanticType, Values};
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
use common_base::BitVec;
use common_time::timestamp::Timestamp;
use common_time::{Date, DateTime};
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::{ValueRef, VectorRef};
use datatypes::schema::SchemaRef;
use datatypes::value::Value;
use datatypes::vectors::VectorBuilder;
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableId;
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
use table::Table;
use crate::error::{
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DecodeInsertSnafu,
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, InvalidColumnProtoSnafu,
MissingTimestampColumnSnafu, Result,
};
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
#[inline]
fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnDef {
ColumnDef {
name: column_name.to_string(),
datatype,
is_nullable: nullable,
default_constraint: None,
}
}
pub fn find_new_columns(
schema: &SchemaRef,
insert_batches: &[InsertBatch],
) -> Result<Option<AddColumns>> {
let mut columns_to_add = Vec::default();
let mut new_columns: HashSet<String> = HashSet::default();
for InsertBatch { columns, row_count } in insert_batches {
if *row_count == 0 || columns.is_empty() {
continue;
}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if schema.column_schema_by_name(column_name).is_none()
&& !new_columns.contains(column_name)
{
let column_def = Some(build_column_def(column_name, *datatype, true));
columns_to_add.push(AddColumn {
column_def,
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
});
new_columns.insert(column_name.to_string());
}
}
}
if columns_to_add.is_empty() {
Ok(None)
} else {
Ok(Some(AddColumns {
add_columns: columns_to_add,
}))
}
}
/// Build a alter table rqeusts that adding new columns.
#[inline]
pub fn build_alter_table_request(
table_name: &str,
columns: Vec<AddColumnRequest>,
) -> AlterTableRequest {
AlterTableRequest {
catalog_name: None,
schema_name: None,
table_name: table_name.to_string(),
alter_kind: AlterKind::AddColumns { columns },
}
}
pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
let wrapper = ColumnDataTypeWrapper::try_new(column.datatype).context(ColumnDataTypeSnafu)?;
let column_datatype = wrapper.datatype();
let rows = rows as usize;
let mut vector = VectorBuilder::with_capacity(wrapper.into(), rows);
if let Some(values) = &column.values {
let values = collect_column_values(column_datatype, values);
let mut values_iter = values.into_iter();
let null_mask = BitVec::from_slice(&column.null_mask);
let mut nulls_iter = null_mask.iter().by_vals().fuse();
for i in 0..rows {
if let Some(true) = nulls_iter.next() {
vector.push_null();
} else {
let value_ref = values_iter.next().context(InvalidColumnProtoSnafu {
err_msg: format!(
"value not found at position {} of column {}",
i, &column.column_name
),
})?;
vector.try_push_ref(value_ref).context(CreateVectorSnafu)?;
}
}
} else {
(0..rows).for_each(|_| vector.push_null());
}
Ok(vector.finish())
}
fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Vec<ValueRef> {
macro_rules! collect_values {
($value: expr, $mapper: expr) => {
$value.iter().map($mapper).collect::<Vec<ValueRef>>()
};
}
match column_datatype {
ColumnDataType::Boolean => collect_values!(values.bool_values, |v| ValueRef::from(*v)),
ColumnDataType::Int8 => collect_values!(values.i8_values, |v| ValueRef::from(*v as i8)),
ColumnDataType::Int16 => {
collect_values!(values.i16_values, |v| ValueRef::from(*v as i16))
}
ColumnDataType::Int32 => {
collect_values!(values.i32_values, |v| ValueRef::from(*v))
}
ColumnDataType::Int64 => {
collect_values!(values.i64_values, |v| ValueRef::from(*v as i64))
}
ColumnDataType::Uint8 => {
collect_values!(values.u8_values, |v| ValueRef::from(*v as u8))
}
ColumnDataType::Uint16 => {
collect_values!(values.u16_values, |v| ValueRef::from(*v as u16))
}
ColumnDataType::Uint32 => {
collect_values!(values.u32_values, |v| ValueRef::from(*v))
}
ColumnDataType::Uint64 => {
collect_values!(values.u64_values, |v| ValueRef::from(*v as u64))
}
ColumnDataType::Float32 => collect_values!(values.f32_values, |v| ValueRef::from(*v)),
ColumnDataType::Float64 => collect_values!(values.f64_values, |v| ValueRef::from(*v)),
ColumnDataType::Binary => {
collect_values!(values.binary_values, |v| ValueRef::from(v.as_slice()))
}
ColumnDataType::String => {
collect_values!(values.string_values, |v| ValueRef::from(v.as_str()))
}
ColumnDataType::Date => {
collect_values!(values.date_values, |v| ValueRef::Date(Date::new(*v)))
}
ColumnDataType::Datetime => {
collect_values!(values.datetime_values, |v| ValueRef::DateTime(
DateTime::new(*v)
))
}
ColumnDataType::Timestamp => {
collect_values!(values.ts_millis_values, |v| ValueRef::Timestamp(
Timestamp::from_millis(*v)
))
}
}
}
/// Try to build create table request from insert data.
pub fn build_create_expr_from_insertion(
catalog_name: &str,
schema_name: &str,
table_id: Option<TableId>,
table_name: &str,
insert_batches: &[InsertBatch],
) -> Result<CreateExpr> {
let mut new_columns: HashSet<String> = HashSet::default();
let mut column_defs = Vec::default();
let mut primary_key_indices = Vec::default();
let mut timestamp_index = usize::MAX;
for InsertBatch { columns, row_count } in insert_batches {
if *row_count == 0 || columns.is_empty() {
continue;
}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if !new_columns.contains(column_name) {
let mut is_nullable = true;
match *semantic_type {
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
TIMESTAMP_SEMANTIC_TYPE => {
ensure!(
timestamp_index == usize::MAX,
DuplicatedTimestampColumnSnafu {
exists: &columns[timestamp_index].column_name,
duplicated: column_name,
}
);
timestamp_index = column_defs.len();
// Timestamp column must not be null.
is_nullable = false;
}
_ => {}
}
let column_def = build_column_def(column_name, *datatype, is_nullable);
column_defs.push(column_def);
new_columns.insert(column_name.to_string());
}
}
ensure!(
timestamp_index != usize::MAX,
MissingTimestampColumnSnafu { msg: table_name }
);
let timestamp_field_name = columns[timestamp_index].column_name.clone();
let primary_keys = primary_key_indices
.iter()
.map(|idx| columns[*idx].column_name.clone())
.collect::<Vec<_>>();
let expr = CreateExpr {
catalog_name: Some(catalog_name.to_string()),
schema_name: Some(schema_name.to_string()),
table_name: table_name.to_string(),
desc: Some("Created on insertion".to_string()),
column_defs,
time_index: timestamp_field_name,
primary_keys,
create_if_not_exists: true,
table_options: Default::default(),
table_id,
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
};
return Ok(expr);
}
IllegalInsertDataSnafu.fail()
}
pub fn insertion_expr_to_request(
catalog_name: &str,
schema_name: &str,
table_name: &str,
insert_batches: Vec<InsertBatch>,
table: Arc<dyn Table>,
) -> Result<InsertRequest> {
let schema = table.schema();
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
for InsertBatch { columns, row_count } in insert_batches {
for Column {
column_name,
values,
null_mask,
..
} in columns
{
let values = match values {
Some(vals) => vals,
None => continue,
};
let column = column_name.clone();
let vector_builder = match columns_builders.entry(column) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
let column_schema = schema.column_schema_by_name(&column_name).context(
ColumnNotFoundSnafu {
column_name: &column_name,
table_name,
},
)?;
let data_type = &column_schema.data_type;
entry.insert(VectorBuilder::with_capacity(
data_type.clone(),
row_count as usize,
))
}
};
add_values_to_builder(vector_builder, values, row_count as usize, null_mask)?;
}
}
let columns_values = columns_builders
.into_iter()
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.finish()))
.collect();
Ok(InsertRequest {
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
table_name: table_name.to_string(),
columns_values,
})
}
#[inline]
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
bytes_vec
.iter()
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
.collect()
}
fn add_values_to_builder(
builder: &mut VectorBuilder,
values: Values,
row_count: usize,
null_mask: Vec<u8>,
) -> Result<()> {
let data_type = builder.data_type();
let values = convert_values(&data_type, values);
if null_mask.is_empty() {
ensure!(values.len() == row_count, IllegalInsertDataSnafu);
values.iter().for_each(|value| {
builder.push(value);
});
} else {
let null_mask = BitVec::from_vec(null_mask);
ensure!(
null_mask.count_ones() + values.len() == row_count,
IllegalInsertDataSnafu
);
let mut idx_of_values = 0;
for idx in 0..row_count {
match is_null(&null_mask, idx) {
Some(true) => builder.push(&Value::Null),
_ => {
builder.push(&values[idx_of_values]);
idx_of_values += 1
}
}
}
}
Ok(())
}
fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
// TODO(fys): use macros to optimize code
match data_type {
ConcreteDataType::Int64(_) => values
.i64_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Float64(_) => values
.f64_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::String(_) => values
.string_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Boolean(_) => values
.bool_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Int8(_) => values.i8_values.into_iter().map(|val| val.into()).collect(),
ConcreteDataType::Int16(_) => values
.i16_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Int32(_) => values
.i32_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::UInt8(_) => values.u8_values.into_iter().map(|val| val.into()).collect(),
ConcreteDataType::UInt16(_) => values
.u16_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::UInt32(_) => values
.u32_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::UInt64(_) => values
.u64_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Float32(_) => values
.f32_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Binary(_) => values
.binary_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::DateTime(_) => values
.i64_values
.into_iter()
.map(|v| Value::DateTime(v.into()))
.collect(),
ConcreteDataType::Date(_) => values
.i32_values
.into_iter()
.map(|v| Value::Date(v.into()))
.collect(),
ConcreteDataType::Timestamp(_) => values
.ts_millis_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::from_millis(v)))
.collect(),
ConcreteDataType::Null(_) => unreachable!(),
ConcreteDataType::List(_) => unreachable!(),
}
}
fn is_null(null_mask: &BitVec, idx: usize) -> Option<bool> {
null_mask.get(idx).as_deref().copied()
}
#[cfg(test)]
mod tests {
use std::any::Any;
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::codec::InsertBatch;
use api::v1::column::{self, SemanticType, Values};
use api::v1::{insert_expr, Column, ColumnDataType};
use common_base::BitVec;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::prelude::Expr;
use common_time::timestamp::Timestamp;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use datatypes::value::Value;
use snafu::ResultExt;
use table::error::Result as TableResult;
use table::metadata::TableInfoRef;
use table::Table;
use super::{
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
};
use crate::error;
use crate::error::ColumnDataTypeSnafu;
#[inline]
fn build_column_schema(
column_name: &str,
datatype: i32,
nullable: bool,
) -> error::Result<ColumnSchema> {
let datatype_wrapper =
ColumnDataTypeWrapper::try_new(datatype).context(ColumnDataTypeSnafu)?;
Ok(ColumnSchema::new(
column_name,
datatype_wrapper.into(),
nullable,
))
}
#[test]
fn test_build_create_table_request() {
let table_id = Some(10);
let table_name = "test_metric";
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
let mock_batch_bytes = mock_insert_batches();
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
let create_expr =
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
.unwrap();
assert_eq!(table_id, create_expr.table_id);
assert_eq!(table_name, create_expr.table_name);
assert_eq!(Some("Created on insertion".to_string()), create_expr.desc);
assert_eq!(
vec![create_expr.column_defs[0].name.clone()],
create_expr.primary_keys
);
let column_defs = create_expr.column_defs;
assert_eq!(column_defs[3].name, create_expr.time_index);
assert_eq!(4, column_defs.len());
assert_eq!(
ConcreteDataType::string_datatype(),
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "host")
.unwrap()
.datatype
)
.unwrap()
)
);
assert_eq!(
ConcreteDataType::float64_datatype(),
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "cpu")
.unwrap()
.datatype
)
.unwrap()
)
);
assert_eq!(
ConcreteDataType::float64_datatype(),
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "memory")
.unwrap()
.datatype
)
.unwrap()
)
);
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
column_defs
.iter()
.find(|c| c.name == "ts")
.unwrap()
.datatype
)
.unwrap()
)
);
}
#[test]
fn test_find_new_columns() {
let mut columns = Vec::with_capacity(1);
let cpu_column = build_column_schema("cpu", 10, true).unwrap();
let ts_column = build_column_schema("ts", 15, false)
.unwrap()
.with_time_index(true);
columns.push(cpu_column);
columns.push(ts_column);
let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
let mock_insert_bytes = mock_insert_batches();
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
assert_eq!(2, add_columns.add_columns.len());
let host_column = &add_columns.add_columns[0];
assert!(host_column.is_key);
assert_eq!(
ConcreteDataType::string_datatype(),
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(host_column.column_def.as_ref().unwrap().datatype)
.unwrap()
)
);
let memory_column = &add_columns.add_columns[1];
assert!(!memory_column.is_key);
assert_eq!(
ConcreteDataType::float64_datatype(),
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(memory_column.column_def.as_ref().unwrap().datatype)
.unwrap()
)
);
}
#[test]
fn test_insertion_expr_to_request() {
let table: Arc<dyn Table> = Arc::new(DemoTable {});
let values = insert_expr::Values {
values: mock_insert_batches(),
};
let insert_batches = insert_batches(&values.values).unwrap();
let insert_req =
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();
assert_eq!("greptime", insert_req.catalog_name);
assert_eq!("public", insert_req.schema_name);
assert_eq!("demo", insert_req.table_name);
let host = insert_req.columns_values.get("host").unwrap();
assert_eq!(Value::String("host1".into()), host.get(0));
assert_eq!(Value::String("host2".into()), host.get(1));
let cpu = insert_req.columns_values.get("cpu").unwrap();
assert_eq!(Value::Float64(0.31.into()), cpu.get(0));
assert_eq!(Value::Null, cpu.get(1));
let memory = insert_req.columns_values.get("memory").unwrap();
assert_eq!(Value::Null, memory.get(0));
assert_eq!(Value::Float64(0.1.into()), memory.get(1));
let ts = insert_req.columns_values.get("ts").unwrap();
assert_eq!(Value::Timestamp(Timestamp::from_millis(100)), ts.get(0));
assert_eq!(Value::Timestamp(Timestamp::from_millis(101)), ts.get(1));
}
#[test]
fn test_convert_values() {
let data_type = ConcreteDataType::float64_datatype();
let values = Values {
f64_values: vec![0.1, 0.2, 0.3],
..Default::default()
};
let result = convert_values(&data_type, values);
assert_eq!(
vec![
Value::Float64(0.1.into()),
Value::Float64(0.2.into()),
Value::Float64(0.3.into())
],
result
);
}
#[test]
fn test_is_null() {
let null_mask = BitVec::from_slice(&[0b0000_0001, 0b0000_1000]);
assert_eq!(Some(true), is_null(&null_mask, 0));
assert_eq!(Some(false), is_null(&null_mask, 1));
assert_eq!(Some(false), is_null(&null_mask, 10));
assert_eq!(Some(true), is_null(&null_mask, 11));
assert_eq!(Some(false), is_null(&null_mask, 12));
assert_eq!(None, is_null(&null_mask, 16));
assert_eq!(None, is_null(&null_mask, 99));
}
struct DemoTable;
#[async_trait::async_trait]
impl Table for DemoTable {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
let column_schemas = vec![
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), true)
.with_time_index(true),
];
Arc::new(
SchemaBuilder::try_from(column_schemas)
.unwrap()
.build()
.unwrap(),
)
}
fn table_info(&self) -> TableInfoRef {
unimplemented!()
}
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> TableResult<PhysicalPlanRef> {
unimplemented!();
}
}
fn mock_insert_batches() -> Vec<Vec<u8>> {
let row_count = 2;
let host_vals = column::Values {
string_values: vec!["host1".to_string(), "host2".to_string()],
..Default::default()
};
let host_column = Column {
column_name: "host".to_string(),
semantic_type: TAG_SEMANTIC_TYPE,
values: Some(host_vals),
null_mask: vec![0],
datatype: ColumnDataType::String as i32,
};
let cpu_vals = column::Values {
f64_values: vec![0.31],
..Default::default()
};
let cpu_column = Column {
column_name: "cpu".to_string(),
semantic_type: SemanticType::Field as i32,
values: Some(cpu_vals),
null_mask: vec![2],
datatype: ColumnDataType::Float64 as i32,
};
let mem_vals = column::Values {
f64_values: vec![0.1],
..Default::default()
};
let mem_column = Column {
column_name: "memory".to_string(),
semantic_type: SemanticType::Field as i32,
values: Some(mem_vals),
null_mask: vec![1],
datatype: ColumnDataType::Float64 as i32,
};
let ts_vals = column::Values {
ts_millis_values: vec![100, 101],
..Default::default()
};
let ts_column = Column {
column_name: "ts".to_string(),
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
values: Some(ts_vals),
null_mask: vec![0],
datatype: ColumnDataType::Timestamp as i32,
};
let insert_batch = InsertBatch {
columns: vec![host_column, cpu_column, mem_column, ts_column],
row_count,
};
vec![insert_batch.into()]
}
}

View File

@@ -0,0 +1,24 @@
#![feature(assert_matches)]
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod alter;
pub mod error;
mod insert;
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
pub use insert::{
build_alter_table_request, build_create_expr_from_insertion, column_to_vector,
find_new_columns, insert_batches, insertion_expr_to_request,
};