mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-14 12:00:40 +00:00
refactor: explicitly define json struct to ingest jsonbench data (#7462)
ingest jsonbench data Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -40,4 +40,8 @@ impl Dialect for GreptimeDbDialect {
|
||||
fn supports_filter_during_aggregation(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_struct_literal(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,6 +215,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid JSON structure setting, reason: {reason}"))]
|
||||
InvalidJsonStructureSetting {
|
||||
reason: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize column default constraint"))]
|
||||
SerializeColumnDefaultConstraint {
|
||||
#[snafu(implicit)]
|
||||
@@ -374,6 +381,7 @@ impl ErrorExt for Error {
|
||||
|
||||
InvalidColumnOption { .. }
|
||||
| InvalidExprAsOptionValue { .. }
|
||||
| InvalidJsonStructureSetting { .. }
|
||||
| InvalidDatabaseName { .. }
|
||||
| InvalidDatabaseOption { .. }
|
||||
| ColumnTypeMismatch { .. }
|
||||
|
||||
@@ -40,16 +40,17 @@ pub(super) fn parse_json_datatype_options(parser: &mut Parser<'_>) -> Result<Opt
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use sqlparser::ast::DataType;
|
||||
use sqlparser::ast::{DataType, Expr, Ident, StructField};
|
||||
|
||||
use crate::dialect::GreptimeDbDialect;
|
||||
use crate::parser::{ParseOptions, ParserContext};
|
||||
use crate::statements::OptionMap;
|
||||
use crate::statements::create::{
|
||||
Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FORMAT,
|
||||
JSON_OPT_UNSTRUCTURED_KEYS,
|
||||
Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FIELDS,
|
||||
JSON_OPT_FORMAT, JSON_OPT_UNSTRUCTURED_KEYS,
|
||||
};
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::util::OptionValue;
|
||||
|
||||
#[test]
|
||||
fn test_parse_json_datatype_options() {
|
||||
@@ -77,6 +78,42 @@ mod tests {
|
||||
|
||||
let sql = r#"
|
||||
CREATE TABLE json_data (
|
||||
my_json JSON(format = "partial", fields = Struct<i Int, "o.a" String, "o.b" String, `x.y.z` Float64>),
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
)"#;
|
||||
let options = parse(sql).unwrap();
|
||||
assert_eq!(options.len(), 2);
|
||||
let option = options.value(JSON_OPT_FIELDS);
|
||||
let expected = OptionValue::try_new(Expr::Struct {
|
||||
values: vec![],
|
||||
fields: vec![
|
||||
StructField {
|
||||
field_name: Some(Ident::new("i")),
|
||||
field_type: DataType::Int(None),
|
||||
options: None,
|
||||
},
|
||||
StructField {
|
||||
field_name: Some(Ident::with_quote('"', "o.a")),
|
||||
field_type: DataType::String(None),
|
||||
options: None,
|
||||
},
|
||||
StructField {
|
||||
field_name: Some(Ident::with_quote('"', "o.b")),
|
||||
field_type: DataType::String(None),
|
||||
options: None,
|
||||
},
|
||||
StructField {
|
||||
field_name: Some(Ident::with_quote('`', "x.y.z")),
|
||||
field_type: DataType::Float64,
|
||||
options: None,
|
||||
},
|
||||
],
|
||||
})
|
||||
.ok();
|
||||
assert_eq!(option, expected.as_ref());
|
||||
|
||||
let sql = r#"
|
||||
CREATE TABLE json_data (
|
||||
my_json JSON(format = "partial", unstructured_keys = ["k", "foo.bar", "a.b.c"]),
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
)"#;
|
||||
|
||||
@@ -40,6 +40,7 @@ use api::v1::SemanticType;
|
||||
use common_sql::default_constraint::parse_column_default_constraint;
|
||||
use common_time::timezone::Timezone;
|
||||
use datatypes::extension::json::{JsonExtensionType, JsonMetadata};
|
||||
use datatypes::json::JsonStructureSettings;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema};
|
||||
use datatypes::types::json_type::JsonNativeType;
|
||||
@@ -281,8 +282,17 @@ pub fn sql_data_type_to_concrete_data_type(
|
||||
}
|
||||
},
|
||||
SqlDataType::JSON => {
|
||||
let format = if column_extensions.json_datatype_options.is_some() {
|
||||
JsonFormat::Native(Box::new(JsonNativeType::Null))
|
||||
let format = if let Some(x) = column_extensions.build_json_structure_settings()? {
|
||||
if let Some(fields) = match x {
|
||||
JsonStructureSettings::Structured(fields) => fields,
|
||||
JsonStructureSettings::UnstructuredRaw => None,
|
||||
JsonStructureSettings::PartialUnstructuredByKey { fields, .. } => fields,
|
||||
} {
|
||||
let datatype = &ConcreteDataType::Struct(fields);
|
||||
JsonFormat::Native(Box::new(datatype.into()))
|
||||
} else {
|
||||
JsonFormat::Native(Box::new(JsonNativeType::Null))
|
||||
}
|
||||
} else {
|
||||
JsonFormat::Jsonb
|
||||
};
|
||||
|
||||
@@ -14,27 +14,30 @@
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::FILE_ENGINE;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::json::JsonStructureSettings;
|
||||
use datatypes::schema::{
|
||||
FulltextOptions, SkippingIndexOptions, VectorDistanceMetric, VectorIndexEngineType,
|
||||
VectorIndexOptions,
|
||||
};
|
||||
use datatypes::types::StructType;
|
||||
use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue};
|
||||
use crate::error::{
|
||||
InvalidFlowQuerySnafu, InvalidSqlSnafu, Result, SetFulltextOptionSnafu,
|
||||
SetSkippingIndexOptionSnafu,
|
||||
InvalidFlowQuerySnafu, InvalidJsonStructureSettingSnafu, InvalidSqlSnafu, Result,
|
||||
SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu,
|
||||
};
|
||||
use crate::statements::OptionMap;
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::statements::tql::Tql;
|
||||
use crate::statements::{OptionMap, sql_data_type_to_concrete_data_type};
|
||||
use crate::util::OptionValue;
|
||||
|
||||
const LINE_SEP: &str = ",\n";
|
||||
@@ -44,6 +47,7 @@ pub const VECTOR_OPT_DIM: &str = "dim";
|
||||
|
||||
pub const JSON_OPT_UNSTRUCTURED_KEYS: &str = "unstructured_keys";
|
||||
pub const JSON_OPT_FORMAT: &str = "format";
|
||||
pub(crate) const JSON_OPT_FIELDS: &str = "fields";
|
||||
pub const JSON_FORMAT_FULL_STRUCTURED: &str = "structured";
|
||||
pub const JSON_FORMAT_RAW: &str = "raw";
|
||||
pub const JSON_FORMAT_PARTIAL: &str = "partial";
|
||||
@@ -346,14 +350,51 @@ impl ColumnExtensions {
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let fields = if let Some(value) = options.value(JSON_OPT_FIELDS) {
|
||||
let fields = value
|
||||
.as_struct_fields()
|
||||
.context(InvalidJsonStructureSettingSnafu {
|
||||
reason: format!(r#"expect "{JSON_OPT_FIELDS}" a struct, actual: "{value}""#,),
|
||||
})?;
|
||||
let fields = fields
|
||||
.iter()
|
||||
.map(|field| {
|
||||
let name = field.field_name.as_ref().map(|x| x.value.clone()).context(
|
||||
InvalidJsonStructureSettingSnafu {
|
||||
reason: format!(r#"missing field name in "{field}""#),
|
||||
},
|
||||
)?;
|
||||
let datatype = sql_data_type_to_concrete_data_type(
|
||||
&field.field_type,
|
||||
&Default::default(),
|
||||
)?;
|
||||
Ok(datatypes::types::StructField::new(name, datatype, true))
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
Some(StructType::new(Arc::new(fields)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
options
|
||||
.get(JSON_OPT_FORMAT)
|
||||
.map(|format| match format {
|
||||
JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(None)),
|
||||
JSON_FORMAT_PARTIAL => Ok(JsonStructureSettings::PartialUnstructuredByKey {
|
||||
fields: None,
|
||||
unstructured_keys,
|
||||
}),
|
||||
JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(fields)),
|
||||
JSON_FORMAT_PARTIAL => {
|
||||
let fields = fields.map(|fields| {
|
||||
let mut fields = Arc::unwrap_or_clone(fields.fields());
|
||||
fields.push(datatypes::types::StructField::new(
|
||||
JsonStructureSettings::RAW_FIELD.to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
));
|
||||
StructType::new(Arc::new(fields))
|
||||
});
|
||||
Ok(JsonStructureSettings::PartialUnstructuredByKey {
|
||||
fields,
|
||||
unstructured_keys,
|
||||
})
|
||||
}
|
||||
JSON_FORMAT_RAW => Ok(JsonStructureSettings::UnstructuredRaw),
|
||||
_ => InvalidSqlSnafu {
|
||||
msg: format!("unknown JSON datatype 'format': {format}"),
|
||||
|
||||
@@ -19,7 +19,8 @@ use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
use snafu::ensure;
|
||||
use sqlparser::ast::{
|
||||
Array, Expr, Ident, ObjectName, SetExpr, SqlOption, TableFactor, Value, ValueWithSpan,
|
||||
Array, Expr, Ident, ObjectName, SetExpr, SqlOption, StructField, TableFactor, Value,
|
||||
ValueWithSpan,
|
||||
};
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
@@ -52,9 +53,12 @@ pub fn format_raw_object_name(name: &ObjectName) -> String {
|
||||
pub struct OptionValue(Expr);
|
||||
|
||||
impl OptionValue {
|
||||
fn try_new(expr: Expr) -> Result<Self> {
|
||||
pub(crate) fn try_new(expr: Expr) -> Result<Self> {
|
||||
ensure!(
|
||||
matches!(expr, Expr::Value(_) | Expr::Identifier(_) | Expr::Array(_)),
|
||||
matches!(
|
||||
expr,
|
||||
Expr::Value(_) | Expr::Identifier(_) | Expr::Array(_) | Expr::Struct { .. }
|
||||
),
|
||||
InvalidExprAsOptionValueSnafu {
|
||||
error: format!("{expr} not accepted")
|
||||
}
|
||||
@@ -106,6 +110,13 @@ impl OptionValue {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_struct_fields(&self) -> Option<&[StructField]> {
|
||||
match &self.0 {
|
||||
Expr::Struct { fields, .. } => Some(fields),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for OptionValue {
|
||||
|
||||
Reference in New Issue
Block a user