refactor: explicitly define json struct to ingest jsonbench data (#7462)

ingest jsonbench data

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-12-24 15:30:22 +08:00
committed by GitHub
parent 2d9967b981
commit dc9f3a702e
31 changed files with 795 additions and 481 deletions

View File

@@ -40,4 +40,8 @@ impl Dialect for GreptimeDbDialect {
fn supports_filter_during_aggregation(&self) -> bool {
true
}
fn supports_struct_literal(&self) -> bool {
true
}
}

View File

@@ -215,6 +215,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid JSON structure setting, reason: {reason}"))]
InvalidJsonStructureSetting {
reason: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to serialize column default constraint"))]
SerializeColumnDefaultConstraint {
#[snafu(implicit)]
@@ -374,6 +381,7 @@ impl ErrorExt for Error {
InvalidColumnOption { .. }
| InvalidExprAsOptionValue { .. }
| InvalidJsonStructureSetting { .. }
| InvalidDatabaseName { .. }
| InvalidDatabaseOption { .. }
| ColumnTypeMismatch { .. }

View File

@@ -40,16 +40,17 @@ pub(super) fn parse_json_datatype_options(parser: &mut Parser<'_>) -> Result<Opt
#[cfg(test)]
mod tests {
use sqlparser::ast::DataType;
use sqlparser::ast::{DataType, Expr, Ident, StructField};
use crate::dialect::GreptimeDbDialect;
use crate::parser::{ParseOptions, ParserContext};
use crate::statements::OptionMap;
use crate::statements::create::{
Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FORMAT,
JSON_OPT_UNSTRUCTURED_KEYS,
Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FIELDS,
JSON_OPT_FORMAT, JSON_OPT_UNSTRUCTURED_KEYS,
};
use crate::statements::statement::Statement;
use crate::util::OptionValue;
#[test]
fn test_parse_json_datatype_options() {
@@ -77,6 +78,42 @@ mod tests {
let sql = r#"
CREATE TABLE json_data (
my_json JSON(format = "partial", fields = Struct<i Int, "o.a" String, "o.b" String, `x.y.z` Float64>),
ts TIMESTAMP TIME INDEX,
)"#;
let options = parse(sql).unwrap();
assert_eq!(options.len(), 2);
let option = options.value(JSON_OPT_FIELDS);
let expected = OptionValue::try_new(Expr::Struct {
values: vec![],
fields: vec![
StructField {
field_name: Some(Ident::new("i")),
field_type: DataType::Int(None),
options: None,
},
StructField {
field_name: Some(Ident::with_quote('"', "o.a")),
field_type: DataType::String(None),
options: None,
},
StructField {
field_name: Some(Ident::with_quote('"', "o.b")),
field_type: DataType::String(None),
options: None,
},
StructField {
field_name: Some(Ident::with_quote('`', "x.y.z")),
field_type: DataType::Float64,
options: None,
},
],
})
.ok();
assert_eq!(option, expected.as_ref());
let sql = r#"
CREATE TABLE json_data (
my_json JSON(format = "partial", unstructured_keys = ["k", "foo.bar", "a.b.c"]),
ts TIMESTAMP TIME INDEX,
)"#;

View File

@@ -40,6 +40,7 @@ use api::v1::SemanticType;
use common_sql::default_constraint::parse_column_default_constraint;
use common_time::timezone::Timezone;
use datatypes::extension::json::{JsonExtensionType, JsonMetadata};
use datatypes::json::JsonStructureSettings;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema};
use datatypes::types::json_type::JsonNativeType;
@@ -281,8 +282,17 @@ pub fn sql_data_type_to_concrete_data_type(
}
},
SqlDataType::JSON => {
let format = if column_extensions.json_datatype_options.is_some() {
JsonFormat::Native(Box::new(JsonNativeType::Null))
let format = if let Some(x) = column_extensions.build_json_structure_settings()? {
if let Some(fields) = match x {
JsonStructureSettings::Structured(fields) => fields,
JsonStructureSettings::UnstructuredRaw => None,
JsonStructureSettings::PartialUnstructuredByKey { fields, .. } => fields,
} {
let datatype = &ConcreteDataType::Struct(fields);
JsonFormat::Native(Box::new(datatype.into()))
} else {
JsonFormat::Native(Box::new(JsonNativeType::Null))
}
} else {
JsonFormat::Jsonb
};

View File

@@ -14,27 +14,30 @@
use std::collections::{HashMap, HashSet};
use std::fmt::{Display, Formatter};
use std::sync::Arc;
use common_catalog::consts::FILE_ENGINE;
use datatypes::data_type::ConcreteDataType;
use datatypes::json::JsonStructureSettings;
use datatypes::schema::{
FulltextOptions, SkippingIndexOptions, VectorDistanceMetric, VectorIndexEngineType,
VectorIndexOptions,
};
use datatypes::types::StructType;
use itertools::Itertools;
use serde::Serialize;
use snafu::ResultExt;
use snafu::{OptionExt, ResultExt};
use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue};
use crate::error::{
InvalidFlowQuerySnafu, InvalidSqlSnafu, Result, SetFulltextOptionSnafu,
SetSkippingIndexOptionSnafu,
InvalidFlowQuerySnafu, InvalidJsonStructureSettingSnafu, InvalidSqlSnafu, Result,
SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu,
};
use crate::statements::OptionMap;
use crate::statements::statement::Statement;
use crate::statements::tql::Tql;
use crate::statements::{OptionMap, sql_data_type_to_concrete_data_type};
use crate::util::OptionValue;
const LINE_SEP: &str = ",\n";
@@ -44,6 +47,7 @@ pub const VECTOR_OPT_DIM: &str = "dim";
pub const JSON_OPT_UNSTRUCTURED_KEYS: &str = "unstructured_keys";
pub const JSON_OPT_FORMAT: &str = "format";
pub(crate) const JSON_OPT_FIELDS: &str = "fields";
pub const JSON_FORMAT_FULL_STRUCTURED: &str = "structured";
pub const JSON_FORMAT_RAW: &str = "raw";
pub const JSON_FORMAT_PARTIAL: &str = "partial";
@@ -346,14 +350,51 @@ impl ColumnExtensions {
})
.unwrap_or_default();
let fields = if let Some(value) = options.value(JSON_OPT_FIELDS) {
let fields = value
.as_struct_fields()
.context(InvalidJsonStructureSettingSnafu {
reason: format!(r#"expect "{JSON_OPT_FIELDS}" a struct, actual: "{value}""#,),
})?;
let fields = fields
.iter()
.map(|field| {
let name = field.field_name.as_ref().map(|x| x.value.clone()).context(
InvalidJsonStructureSettingSnafu {
reason: format!(r#"missing field name in "{field}""#),
},
)?;
let datatype = sql_data_type_to_concrete_data_type(
&field.field_type,
&Default::default(),
)?;
Ok(datatypes::types::StructField::new(name, datatype, true))
})
.collect::<Result<_>>()?;
Some(StructType::new(Arc::new(fields)))
} else {
None
};
options
.get(JSON_OPT_FORMAT)
.map(|format| match format {
JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(None)),
JSON_FORMAT_PARTIAL => Ok(JsonStructureSettings::PartialUnstructuredByKey {
fields: None,
unstructured_keys,
}),
JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(fields)),
JSON_FORMAT_PARTIAL => {
let fields = fields.map(|fields| {
let mut fields = Arc::unwrap_or_clone(fields.fields());
fields.push(datatypes::types::StructField::new(
JsonStructureSettings::RAW_FIELD.to_string(),
ConcreteDataType::string_datatype(),
true,
));
StructType::new(Arc::new(fields))
});
Ok(JsonStructureSettings::PartialUnstructuredByKey {
fields,
unstructured_keys,
})
}
JSON_FORMAT_RAW => Ok(JsonStructureSettings::UnstructuredRaw),
_ => InvalidSqlSnafu {
msg: format!("unknown JSON datatype 'format': {format}"),

View File

@@ -19,7 +19,8 @@ use itertools::Itertools;
use serde::Serialize;
use snafu::ensure;
use sqlparser::ast::{
Array, Expr, Ident, ObjectName, SetExpr, SqlOption, TableFactor, Value, ValueWithSpan,
Array, Expr, Ident, ObjectName, SetExpr, SqlOption, StructField, TableFactor, Value,
ValueWithSpan,
};
use sqlparser_derive::{Visit, VisitMut};
@@ -52,9 +53,12 @@ pub fn format_raw_object_name(name: &ObjectName) -> String {
pub struct OptionValue(Expr);
impl OptionValue {
fn try_new(expr: Expr) -> Result<Self> {
pub(crate) fn try_new(expr: Expr) -> Result<Self> {
ensure!(
matches!(expr, Expr::Value(_) | Expr::Identifier(_) | Expr::Array(_)),
matches!(
expr,
Expr::Value(_) | Expr::Identifier(_) | Expr::Array(_) | Expr::Struct { .. }
),
InvalidExprAsOptionValueSnafu {
error: format!("{expr} not accepted")
}
@@ -106,6 +110,13 @@ impl OptionValue {
_ => None,
}
}
pub(crate) fn as_struct_fields(&self) -> Option<&[StructField]> {
match &self.0 {
Expr::Struct { fields, .. } => Some(fields),
_ => None,
}
}
}
impl From<String> for OptionValue {