mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-14 20:10:37 +00:00
feat: add json data type (#4619)
* feat: add json type and vector * fix: allow to create and insert json data * feat: udf to query json as string * refactor: remove JsonbValue and JsonVector * feat: show json value as strings * chore: make ci happy * test: adunit test and sqlness test * refactor: use binary as grpc value of json * fix: use non-preserve-order jsonb * test: revert changed test * refactor: change udf get_by_path to jq * chore: make ci happy * fix: distinguish binary and json in proto * chore: delete udf for future pr * refactor: remove Value(Json) * chore: follow review comments * test: some tests and checks * test: fix unit tests * chore: follow review comments * chore: corresponding changes to proto * fix: change grpc and pgsql server behavior alongside with sqlness/crud tests * chore: follow review comments * feat: udf of conversions between json and strings, used for grpc server * refactor: rename to_string to json_to_string * test: add more sqlness test for json * chore: thanks for review :) * Apply suggestions from code review --------- Co-authored-by: Weny Xu <wenymedia@gmail.com>
This commit is contained in:
29
Cargo.lock
generated
29
Cargo.lock
generated
@@ -1952,6 +1952,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"geohash",
|
||||
"h3o",
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
@@ -3167,6 +3168,7 @@ dependencies = [
|
||||
"datafusion-common",
|
||||
"enum_dispatch",
|
||||
"greptime-proto",
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 3.9.2",
|
||||
@@ -3699,6 +3701,12 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "fast-float"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
|
||||
|
||||
[[package]]
|
||||
name = "fastdivide"
|
||||
version = "0.4.1"
|
||||
@@ -4303,7 +4311,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=157cfdb52709e489cf1f3ce8e3042ed4ee8a524a#157cfdb52709e489cf1f3ce8e3042ed4ee8a524a"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=973f49cde88a582fb65755cc572ebcf6fb93ccf7#973f49cde88a582fb65755cc572ebcf6fb93ccf7"
|
||||
dependencies = [
|
||||
"prost 0.12.6",
|
||||
"serde",
|
||||
@@ -5410,6 +5418,21 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonb"
|
||||
version = "0.4.1"
|
||||
source = "git+https://github.com/CookiePieWw/jsonb.git?rev=d0166c130fce903bf6c58643417a3173a6172d31#d0166c130fce903bf6c58643417a3173a6172d31"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"fast-float",
|
||||
"itoa",
|
||||
"nom",
|
||||
"ordered-float 4.2.0",
|
||||
"rand",
|
||||
"ryu",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.5.1"
|
||||
@@ -8063,6 +8086,8 @@ dependencies = [
|
||||
"chrono",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -10401,6 +10426,7 @@ dependencies = [
|
||||
"hyper 0.14.29",
|
||||
"influxdb_line_protocol",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"mime_guess",
|
||||
"mysql_async",
|
||||
@@ -10780,6 +10806,7 @@ dependencies = [
|
||||
"hex",
|
||||
"iso8601",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"serde_json",
|
||||
|
||||
@@ -120,10 +120,11 @@ etcd-client = { version = "0.13" }
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "157cfdb52709e489cf1f3ce8e3042ed4ee8a524a" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
|
||||
mockall = "0.11.4"
|
||||
|
||||
@@ -42,7 +42,8 @@ use greptime_proto::v1::greptime_request::Request;
|
||||
use greptime_proto::v1::query_request::Query;
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{
|
||||
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, QueryRequest, Row, SemanticType,
|
||||
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, QueryRequest,
|
||||
Row, SemanticType,
|
||||
};
|
||||
use paste::paste;
|
||||
use snafu::prelude::*;
|
||||
@@ -103,7 +104,17 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::Uint64 => ConcreteDataType::uint64_datatype(),
|
||||
ColumnDataType::Float32 => ConcreteDataType::float32_datatype(),
|
||||
ColumnDataType::Float64 => ConcreteDataType::float64_datatype(),
|
||||
ColumnDataType::Binary => ConcreteDataType::binary_datatype(),
|
||||
ColumnDataType::Binary => {
|
||||
if let Some(TypeExt::JsonType(_)) = datatype_wrapper
|
||||
.datatype_ext
|
||||
.as_ref()
|
||||
.and_then(|datatype_ext| datatype_ext.type_ext.as_ref())
|
||||
{
|
||||
ConcreteDataType::json_datatype()
|
||||
} else {
|
||||
ConcreteDataType::binary_datatype()
|
||||
}
|
||||
}
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
@@ -236,7 +247,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::UInt64(_) => ColumnDataType::Uint64,
|
||||
ConcreteDataType::Float32(_) => ColumnDataType::Float32,
|
||||
ConcreteDataType::Float64(_) => ColumnDataType::Float64,
|
||||
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
@@ -276,6 +287,16 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
})),
|
||||
})
|
||||
}
|
||||
ColumnDataType::Binary => {
|
||||
if datatype == ConcreteDataType::json_datatype() {
|
||||
// Json is the same as binary in proto. The extension marks the binary in proto is actually a json.
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
Ok(Self {
|
||||
@@ -649,7 +670,8 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
| ConcreteDataType::Duration(_) => {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Json(_) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
@@ -813,7 +835,8 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
| ConcreteDataType::Duration(_) => {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Json(_) => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
@@ -831,7 +854,13 @@ pub fn is_column_type_value_eq(
|
||||
expect_type: &ConcreteDataType,
|
||||
) -> bool {
|
||||
ColumnDataTypeWrapper::try_new(type_value, type_extension)
|
||||
.map(|wrapper| ConcreteDataType::from(wrapper) == *expect_type)
|
||||
.map(|wrapper| {
|
||||
let datatype = ConcreteDataType::from(wrapper);
|
||||
(datatype == *expect_type)
|
||||
// Json type leverage binary type in pb, so this is valid.
|
||||
|| (datatype == ConcreteDataType::binary_datatype()
|
||||
&& *expect_type == ConcreteDataType::json_datatype())
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -22,6 +22,7 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
|
||||
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
|
||||
use crate::scalars::date::DateFunction;
|
||||
use crate::scalars::expression::ExpressionFunction;
|
||||
use crate::scalars::json::JsonFunction;
|
||||
use crate::scalars::matches::MatchesFunction;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::numpy::NumpyFunction;
|
||||
@@ -116,6 +117,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
SystemFunction::register(&function_registry);
|
||||
TableFunction::register(&function_registry);
|
||||
|
||||
// Json related functions
|
||||
JsonFunction::register(&function_registry);
|
||||
|
||||
// Geo functions
|
||||
#[cfg(feature = "geo")]
|
||||
crate::scalars::geo::GeoFunctions::register(&function_registry);
|
||||
|
||||
@@ -17,9 +17,11 @@ pub(crate) mod date;
|
||||
pub mod expression;
|
||||
#[cfg(feature = "geo")]
|
||||
pub mod geo;
|
||||
pub mod json;
|
||||
pub mod matches;
|
||||
pub mod math;
|
||||
pub mod numpy;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test;
|
||||
pub(crate) mod timestamp;
|
||||
|
||||
31
src/common/function/src/scalars/json.rs
Normal file
31
src/common/function/src/scalars/json.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
mod json_to_string;
|
||||
mod to_json;
|
||||
|
||||
use json_to_string::JsonToStringFunction;
|
||||
use to_json::ToJsonFunction;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct JsonFunction;
|
||||
|
||||
impl JsonFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(JsonToStringFunction));
|
||||
registry.register(Arc::new(ToJsonFunction));
|
||||
}
|
||||
}
|
||||
174
src/common/function/src/scalars/json/json_to_string.rs
Normal file
174
src/common/function/src/scalars/json/json_to_string.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct JsonToStringFunction;
|
||||
|
||||
const NAME: &str = "json_to_string";
|
||||
|
||||
impl Function for JsonToStringFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::json_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let jsons = &columns[0];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let result = match json {
|
||||
Ok(Some(json)) => match jsonb::from_slice(json) {
|
||||
Ok(json) => {
|
||||
let json = json.to_string();
|
||||
Some(json)
|
||||
}
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Illegal json binary: {:?}", json),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonToStringFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "JSON_TO_STRING")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::BinaryVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let json_to_string = JsonToStringFunction;
|
||||
|
||||
assert_eq!("json_to_string", json_to_string.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
json_to_string
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(json_to_string.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_vector = BinaryVector::from_vec(jsonbs);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
|
||||
let vector = json_to_string
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in json_strings.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_string().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt.replace(" ", ""), result);
|
||||
}
|
||||
|
||||
let invalid_jsonb = vec![b"invalid json"];
|
||||
let invalid_json_vector = BinaryVector::from_vec(invalid_jsonb);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(invalid_json_vector)];
|
||||
let vector = json_to_string.eval(FunctionContext::default(), &args);
|
||||
assert!(vector.is_err());
|
||||
}
|
||||
}
|
||||
165
src/common/function/src/scalars/json/to_json.rs
Normal file
165
src/common/function/src/scalars/json/to_json.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Parses the `String` into `JSONB`.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToJsonFunction;
|
||||
|
||||
const NAME: &str = "to_json";
|
||||
|
||||
impl Function for ToJsonFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::json_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::string_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let json_strings = &columns[0];
|
||||
|
||||
let size = json_strings.len();
|
||||
let datatype = json_strings.data_type();
|
||||
let mut results = BinaryVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
ConcreteDataType::String(_) => {
|
||||
for i in 0..size {
|
||||
let json_string = json_strings.get_ref(i);
|
||||
|
||||
let json_string = json_string.as_string();
|
||||
let result = match json_string {
|
||||
Ok(Some(json_string)) => match jsonb::parse_value(json_string.as_bytes()) {
|
||||
Ok(json) => Some(json.to_vec()),
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"Cannot convert the string to json, have: {}",
|
||||
json_string
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ToJsonFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TO_JSON")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_by_path_function() {
|
||||
let to_json = ToJsonFunction;
|
||||
|
||||
assert_eq!("to_json", to_json.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::json_datatype(),
|
||||
to_json
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(to_json.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::string_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let value = jsonb::parse_value(s.as_bytes()).unwrap();
|
||||
value.to_vec()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
|
||||
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, gt) in jsonbs.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_binary().unwrap().unwrap();
|
||||
// remove whitespaces
|
||||
assert_eq!(gt, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,7 +70,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
return Ok(vals);
|
||||
},
|
||||
)+
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) | ConcreteDataType::Json(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ common-time.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
greptime-proto.workspace = true
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
|
||||
@@ -33,8 +33,8 @@ use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
|
||||
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
|
||||
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, ListType,
|
||||
NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
|
||||
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
@@ -81,6 +81,9 @@ pub enum ConcreteDataType {
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
Dictionary(DictionaryType),
|
||||
|
||||
// JSON type:
|
||||
Json(JsonType),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConcreteDataType {
|
||||
@@ -128,6 +131,7 @@ impl fmt::Display for ConcreteDataType {
|
||||
ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::List(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
|
||||
ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -162,6 +166,7 @@ impl ConcreteDataType {
|
||||
| ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Decimal128(_)
|
||||
| ConcreteDataType::Binary(_)
|
||||
| ConcreteDataType::Json(_)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -216,6 +221,10 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Decimal128(_))
|
||||
}
|
||||
|
||||
pub fn is_json(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Json(_))
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
@@ -404,7 +413,7 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String
|
||||
Binary, Date, DateTime, String, Json
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
|
||||
@@ -25,6 +25,7 @@ use datafusion_common::DFSchemaRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
|
||||
use crate::prelude::DataType;
|
||||
pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY,
|
||||
TIME_INDEX_KEY,
|
||||
@@ -34,6 +35,8 @@ pub use crate::schema::raw::RawSchema;
|
||||
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
pub const VERSION_KEY: &str = "greptime:version";
|
||||
/// Key used to store actual column type in field metadata.
|
||||
pub const TYPE_KEY: &str = "greptime:type";
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
@@ -256,7 +259,13 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
|
||||
if column_schema.is_time_index() && timestamp_index.is_none() {
|
||||
timestamp_index = Some(index);
|
||||
}
|
||||
let field = Field::try_from(column_schema)?;
|
||||
let mut field = Field::try_from(column_schema)?;
|
||||
|
||||
// Json column performs the same as binary column in Arrow, so we need to mark it
|
||||
if column_schema.data_type.is_json() {
|
||||
let metadata = HashMap::from([(TYPE_KEY.to_string(), column_schema.data_type.name())]);
|
||||
field = field.with_metadata(metadata);
|
||||
}
|
||||
fields.push(field);
|
||||
ensure!(
|
||||
name_to_index
|
||||
|
||||
@@ -22,6 +22,8 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
use crate::schema::TYPE_KEY;
|
||||
use crate::types::JSON_TYPE_NAME;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
@@ -268,7 +270,14 @@ impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut data_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
// Override the data type if it is specified in the metadata.
|
||||
if field.metadata().contains_key(TYPE_KEY) {
|
||||
data_type = match field.metadata().get(TYPE_KEY).unwrap().as_str() {
|
||||
JSON_TYPE_NAME => ConcreteDataType::json_datatype(),
|
||||
_ => data_type,
|
||||
};
|
||||
}
|
||||
let mut metadata = field.metadata().clone();
|
||||
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => {
|
||||
@@ -528,4 +537,32 @@ mod tests {
|
||||
assert_eq!(formatted_int8, "test_column_1 Int8 null");
|
||||
assert_eq!(formatted_int32, "test_column_2 Int32 not null");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_field_to_column_schema() {
|
||||
let field = Field::new("test", ArrowDataType::Int32, true);
|
||||
let column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!("test", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
|
||||
assert!(column_schema.is_nullable);
|
||||
assert!(!column_schema.is_time_index);
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert!(column_schema.metadata.is_empty());
|
||||
|
||||
let field = Field::new("test", ArrowDataType::Binary, true);
|
||||
let field = field.with_metadata(Metadata::from([(
|
||||
TYPE_KEY.to_string(),
|
||||
ConcreteDataType::json_datatype().name(),
|
||||
)]));
|
||||
let column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!("test", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
|
||||
assert!(column_schema.is_nullable);
|
||||
assert!(!column_schema.is_time_index);
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert_eq!(
|
||||
column_schema.metadata.get(TYPE_KEY).unwrap(),
|
||||
&ConcreteDataType::json_datatype().name()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,6 +68,8 @@ pub enum LogicalTypeId {
|
||||
|
||||
List,
|
||||
Dictionary,
|
||||
|
||||
Json,
|
||||
}
|
||||
|
||||
impl LogicalTypeId {
|
||||
@@ -126,6 +128,7 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::DurationMicrosecond => ConcreteDataType::duration_microsecond_datatype(),
|
||||
LogicalTypeId::DurationNanosecond => ConcreteDataType::duration_nanosecond_datatype(),
|
||||
LogicalTypeId::Decimal128 => ConcreteDataType::decimal128_default_datatype(),
|
||||
LogicalTypeId::Json => ConcreteDataType::json_datatype(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ mod decimal_type;
|
||||
mod dictionary_type;
|
||||
mod duration_type;
|
||||
mod interval_type;
|
||||
mod json_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_type;
|
||||
@@ -42,6 +43,7 @@ pub use duration_type::{
|
||||
pub use interval_type::{
|
||||
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType,
|
||||
};
|
||||
pub use json_type::{JsonType, JSON_TYPE_NAME};
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_type::{
|
||||
|
||||
67
src/datatypes/src/types/json_type.rs
Normal file
67
src/datatypes/src/types/json_type.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
pub const JSON_TYPE_NAME: &str = "Json";
|
||||
|
||||
/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
|
||||
/// It utilizes current binary value and vector implementation.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct JsonType;
|
||||
|
||||
impl JsonType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for JsonType {
|
||||
fn name(&self) -> String {
|
||||
JSON_TYPE_NAME.to_string()
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Json
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Bytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Binary
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn try_cast(&self, from: Value) -> Option<Value> {
|
||||
match from {
|
||||
Value::Binary(v) => Some(Value::Binary(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -342,7 +342,8 @@ impl Value {
|
||||
let value_type_id = self.logical_type_id();
|
||||
let output_type_id = output_type.logical_type_id();
|
||||
ensure!(
|
||||
output_type_id == value_type_id || self.is_null(),
|
||||
// Json type leverage Value(Binary) for storage.
|
||||
output_type_id == value_type_id || self.is_null() || (output_type_id == LogicalTypeId::Json && value_type_id == LogicalTypeId::Binary),
|
||||
error::ToScalarValueSnafu {
|
||||
reason: format!(
|
||||
"expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}",
|
||||
@@ -484,7 +485,7 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
|
||||
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
|
||||
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
|
||||
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
|
||||
ConcreteDataType::Binary(_) => ScalarValue::Binary(None),
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ScalarValue::Binary(None),
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
|
||||
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
|
||||
@@ -1994,6 +1995,10 @@ mod tests {
|
||||
&ConcreteDataType::duration_nanosecond_datatype(),
|
||||
&Value::Duration(Duration::new_nanosecond(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::decimal128_datatype(38, 10),
|
||||
&Value::Decimal128(Decimal128::new(1, 38, 10)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2178,6 +2183,14 @@ mod tests {
|
||||
ValueRef::List(ListValueRef::Ref { val: &list }),
|
||||
Value::List(list.clone()).as_value_ref()
|
||||
);
|
||||
|
||||
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
ValueRef::Binary(jsonb_value.clone().as_slice()),
|
||||
Value::Binary(jsonb_value.into()).as_value_ref()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2391,6 +2404,16 @@ mod tests {
|
||||
.try_to_scalar_value(&ConcreteDataType::binary_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(Some(jsonb_value.clone())),
|
||||
Value::Binary(jsonb_value.into())
|
||||
.try_to_scalar_value(&ConcreteDataType::json_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2523,6 +2546,12 @@ mod tests {
|
||||
.try_to_scalar_value(&ConcreteDataType::duration_nanosecond_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Binary(None),
|
||||
Value::Null
|
||||
.try_to_scalar_value(&ConcreteDataType::json_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -80,7 +80,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
match lhs.data_type() {
|
||||
Null(_) => true,
|
||||
Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs),
|
||||
Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
Binary(_) | Json(_) => is_vector_eq!(BinaryVector, lhs, rhs),
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
|
||||
@@ -68,7 +68,7 @@ impl SortField {
|
||||
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
|
||||
ConcreteDataType::Float32(_) => 5,
|
||||
ConcreteDataType::Float64(_) => 9,
|
||||
ConcreteDataType::Binary(_) => 11,
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => 11,
|
||||
ConcreteDataType::String(_) => 11, // a non-empty string takes at least 11 bytes.
|
||||
ConcreteDataType::Date(_) => 5,
|
||||
ConcreteDataType::DateTime(_) => 9,
|
||||
@@ -146,7 +146,8 @@ impl SortField {
|
||||
Time, time,
|
||||
Interval, interval,
|
||||
Duration, duration,
|
||||
Decimal128, decimal128
|
||||
Decimal128, decimal128,
|
||||
Json, binary
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -169,7 +170,7 @@ impl SortField {
|
||||
Ok(Value::from(Option::<$f>::deserialize(deserializer).context(error::DeserializeFieldSnafu)?))
|
||||
}
|
||||
)*
|
||||
ConcreteDataType::Binary(_) => Ok(Value::from(
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => Ok(Value::from(
|
||||
Option::<Vec<u8>>::deserialize(deserializer)
|
||||
.context(error::DeserializeFieldSnafu)?
|
||||
.map(Bytes::from),
|
||||
@@ -237,7 +238,7 @@ impl SortField {
|
||||
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
|
||||
ConcreteDataType::Float32(_) => 5,
|
||||
ConcreteDataType::Float64(_) => 9,
|
||||
ConcreteDataType::Binary(_) => {
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => {
|
||||
// Now the encoder encode binary as a list of bytes so we can't use
|
||||
// skip bytes.
|
||||
let pos_before = deserializer.position();
|
||||
|
||||
@@ -156,20 +156,22 @@ impl MergeScanExec {
|
||||
query_ctx: QueryContextRef,
|
||||
target_partition: usize,
|
||||
) -> Result<Self> {
|
||||
let arrow_schema_without_metadata = Self::arrow_schema_without_metadata(arrow_schema);
|
||||
// TODO(CookiePieWw): Initially we removed the metadata from the schema in #2000, but we have to
|
||||
// keep it for #4619 to identify json type in src/datatypes/src/schema/column_schema.rs.
|
||||
// Reconsider if it's possible to remove it.
|
||||
let arrow_schema = Arc::new(arrow_schema.clone());
|
||||
let properties = PlanProperties::new(
|
||||
EquivalenceProperties::new(arrow_schema_without_metadata.clone()),
|
||||
EquivalenceProperties::new(arrow_schema.clone()),
|
||||
Partitioning::UnknownPartitioning(target_partition),
|
||||
ExecutionMode::Bounded,
|
||||
);
|
||||
let schema_without_metadata =
|
||||
Self::arrow_schema_to_schema(arrow_schema_without_metadata.clone())?;
|
||||
let schema = Self::arrow_schema_to_schema(arrow_schema.clone())?;
|
||||
Ok(Self {
|
||||
table,
|
||||
regions,
|
||||
plan,
|
||||
schema: schema_without_metadata,
|
||||
arrow_schema: arrow_schema_without_metadata,
|
||||
schema,
|
||||
arrow_schema,
|
||||
region_query_handler,
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
sub_stage_metrics: Arc::default(),
|
||||
@@ -288,20 +290,6 @@ impl MergeScanExec {
|
||||
}))
|
||||
}
|
||||
|
||||
fn arrow_schema_without_metadata(arrow_schema: &ArrowSchema) -> ArrowSchemaRef {
|
||||
Arc::new(ArrowSchema::new(
|
||||
arrow_schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| {
|
||||
let field = field.as_ref().clone();
|
||||
let field_without_metadata = field.with_metadata(Default::default());
|
||||
Arc::new(field_without_metadata)
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
))
|
||||
}
|
||||
|
||||
fn arrow_schema_to_schema(arrow_schema: ArrowSchemaRef) -> Result<SchemaRef> {
|
||||
let schema = Schema::try_from(arrow_schema).context(ConvertSchemaSnafu)?;
|
||||
Ok(Arc::new(schema))
|
||||
|
||||
@@ -58,6 +58,7 @@ humantime-serde.workspace = true
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", branch = "feat/line-protocol" }
|
||||
itertools.workspace = true
|
||||
jsonb.workspace = true
|
||||
lazy_static.workspace = true
|
||||
mime_guess = "2.0"
|
||||
notify.workspace = true
|
||||
@@ -70,7 +71,7 @@ parking_lot = "0.12"
|
||||
pgwire = "0.20"
|
||||
pin-project = "1.0"
|
||||
pipeline.workspace = true
|
||||
postgres-types = { version = "0.2", features = ["with-chrono-0_4"] }
|
||||
postgres-types = { version = "0.2", features = ["with-chrono-0_4", "with-serde_json-1"] }
|
||||
pprof = { version = "0.13", features = [
|
||||
"flamegraph",
|
||||
"prost-codec",
|
||||
|
||||
@@ -168,6 +168,7 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
|
||||
&mut row_writer,
|
||||
&record_batch,
|
||||
query_context.clone(),
|
||||
&column_def,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
@@ -191,9 +192,10 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
|
||||
row_writer: &mut RowWriter<'_, W>,
|
||||
recordbatch: &RecordBatch,
|
||||
query_context: QueryContextRef,
|
||||
column_def: &[Column],
|
||||
) -> Result<()> {
|
||||
for row in recordbatch.rows() {
|
||||
for value in row.into_iter() {
|
||||
for (value, column) in row.into_iter().zip(column_def.iter()) {
|
||||
match value {
|
||||
Value::Null => row_writer.write_col(None::<u8>)?,
|
||||
Value::Boolean(v) => row_writer.write_col(v as i8)?,
|
||||
@@ -208,7 +210,14 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
|
||||
Value::Float32(v) => row_writer.write_col(v.0)?,
|
||||
Value::Float64(v) => row_writer.write_col(v.0)?,
|
||||
Value::String(v) => row_writer.write_col(v.as_utf8())?,
|
||||
Value::Binary(v) => row_writer.write_col(v.deref())?,
|
||||
Value::Binary(v) => match column.coltype {
|
||||
ColumnType::MYSQL_TYPE_JSON => {
|
||||
row_writer.write_col(jsonb::to_string(&v))?;
|
||||
}
|
||||
_ => {
|
||||
row_writer.write_col(v.deref())?;
|
||||
}
|
||||
},
|
||||
Value::Date(v) => row_writer.write_col(v.to_chrono_date())?,
|
||||
// convert datetime and timestamp to timezone of current connection
|
||||
Value::DateTime(v) => row_writer.write_col(
|
||||
@@ -281,6 +290,7 @@ pub(crate) fn create_mysql_column(
|
||||
ConcreteDataType::Interval(_) => Ok(ColumnType::MYSQL_TYPE_VARCHAR),
|
||||
ConcreteDataType::Duration(_) => Ok(ColumnType::MYSQL_TYPE_TIME),
|
||||
ConcreteDataType::Decimal128(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL),
|
||||
ConcreteDataType::Json(_) => Ok(ColumnType::MYSQL_TYPE_JSON),
|
||||
_ => error::UnsupportedDataTypeSnafu {
|
||||
data_type,
|
||||
reason: "not implemented",
|
||||
|
||||
@@ -150,8 +150,8 @@ where
|
||||
.map(move |row| {
|
||||
row.and_then(|row| {
|
||||
let mut encoder = DataRowEncoder::new(pg_schema_ref.clone());
|
||||
for value in row.iter() {
|
||||
encode_value(&query_ctx, value, &mut encoder)?;
|
||||
for (value, column) in row.iter().zip(schema.column_schemas()) {
|
||||
encode_value(&query_ctx, value, &mut encoder, &column.data_type)?;
|
||||
}
|
||||
encoder.finish()
|
||||
})
|
||||
|
||||
@@ -62,6 +62,7 @@ pub(super) fn encode_value(
|
||||
query_ctx: &QueryContextRef,
|
||||
value: &Value,
|
||||
builder: &mut DataRowEncoder,
|
||||
datatype: &ConcreteDataType,
|
||||
) -> PgWireResult<()> {
|
||||
match value {
|
||||
Value::Null => builder.encode_field(&None::<&i8>),
|
||||
@@ -77,13 +78,18 @@ pub(super) fn encode_value(
|
||||
Value::Float32(v) => builder.encode_field(&v.0),
|
||||
Value::Float64(v) => builder.encode_field(&v.0),
|
||||
Value::String(v) => builder.encode_field(&v.as_utf8()),
|
||||
Value::Binary(v) => {
|
||||
let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
|
||||
match *bytea_output {
|
||||
PGByteaOutputValue::ESCAPE => builder.encode_field(&EscapeOutputBytea(v.deref())),
|
||||
PGByteaOutputValue::HEX => builder.encode_field(&HexOutputBytea(v.deref())),
|
||||
Value::Binary(v) => match datatype {
|
||||
ConcreteDataType::Json(_) => builder.encode_field(&jsonb::to_string(v)),
|
||||
_ => {
|
||||
let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output();
|
||||
match *bytea_output {
|
||||
PGByteaOutputValue::ESCAPE => {
|
||||
builder.encode_field(&EscapeOutputBytea(v.deref()))
|
||||
}
|
||||
PGByteaOutputValue::HEX => builder.encode_field(&HexOutputBytea(v.deref())),
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Value::Date(v) => {
|
||||
if let Some(date) = v.to_chrono_date() {
|
||||
let (style, order) = *query_ctx.configuration_parameter().pg_datetime_style();
|
||||
@@ -154,6 +160,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result<Type> {
|
||||
&ConcreteDataType::Time(_) => Ok(Type::TIME),
|
||||
&ConcreteDataType::Interval(_) => Ok(Type::INTERVAL),
|
||||
&ConcreteDataType::Decimal128(_) => Ok(Type::NUMERIC),
|
||||
&ConcreteDataType::Json(_) => Ok(Type::JSON),
|
||||
&ConcreteDataType::Duration(_)
|
||||
| &ConcreteDataType::List(_)
|
||||
| &ConcreteDataType::Dictionary(_) => server_error::UnsupportedDataTypeSnafu {
|
||||
@@ -549,6 +556,23 @@ pub(super) fn parameters_to_scalar_values(
|
||||
}
|
||||
}
|
||||
}
|
||||
&Type::JSONB => {
|
||||
let data = portal.parameter::<serde_json::Value>(idx, &client_type)?;
|
||||
match server_type {
|
||||
ConcreteDataType::Binary(_) => {
|
||||
ScalarValue::Binary(data.map(|d| jsonb::Value::from(d).to_vec()))
|
||||
}
|
||||
_ => {
|
||||
return Err(invalid_parameter_error(
|
||||
"invalid_parameter_type",
|
||||
Some(&format!(
|
||||
"Expected: {}, found: {}",
|
||||
server_type, client_type
|
||||
)),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => Err(invalid_parameter_error(
|
||||
"unsupported_parameter_value",
|
||||
Some(&format!("Found type: {}", client_type)),
|
||||
@@ -581,6 +605,8 @@ pub(super) fn param_types_to_pg_types(
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::interval::IntervalUnit;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::value::ListValue;
|
||||
use pgwire::api::results::{FieldFormat, FieldInfo};
|
||||
@@ -778,6 +804,35 @@ mod test {
|
||||
),
|
||||
];
|
||||
|
||||
let datatypes = vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::time_datatype(TimeUnit::Second),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Second),
|
||||
ConcreteDataType::interval_datatype(IntervalUnit::YearMonth),
|
||||
];
|
||||
let values = vec![
|
||||
Value::Null,
|
||||
Value::Boolean(true),
|
||||
@@ -812,14 +867,15 @@ mod test {
|
||||
.build()
|
||||
.into();
|
||||
let mut builder = DataRowEncoder::new(Arc::new(schema));
|
||||
for i in values.iter() {
|
||||
encode_value(&query_context, i, &mut builder).unwrap();
|
||||
for (value, datatype) in values.iter().zip(datatypes) {
|
||||
encode_value(&query_context, value, &mut builder, &datatype).unwrap();
|
||||
}
|
||||
|
||||
let err = encode_value(
|
||||
&query_context,
|
||||
&Value::List(ListValue::new(vec![], ConcreteDataType::int16_datatype())),
|
||||
&mut builder,
|
||||
&ConcreteDataType::list_datatype(ConcreteDataType::int16_datatype()),
|
||||
)
|
||||
.unwrap_err();
|
||||
match err {
|
||||
|
||||
@@ -26,6 +26,7 @@ datatypes.workspace = true
|
||||
hex = "0.4"
|
||||
iso8601 = "0.6.1"
|
||||
itertools.workspace = true
|
||||
jsonb.workspace = true
|
||||
lazy_static.workspace = true
|
||||
regex.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -124,6 +124,16 @@ fn parse_string_to_value(
|
||||
}
|
||||
}
|
||||
ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())),
|
||||
ConcreteDataType::Json(_) => {
|
||||
if let Ok(json) = jsonb::parse_value(s.as_bytes()) {
|
||||
Ok(Value::Binary(json.to_vec().into()))
|
||||
} else {
|
||||
ParseSqlValueSnafu {
|
||||
msg: format!("Failed to parse {s} to Json value"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
@@ -250,7 +260,19 @@ pub fn sql_value_to_value(
|
||||
SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => {
|
||||
parse_string_to_value(column_name, s.clone(), data_type, timezone)?
|
||||
}
|
||||
SqlValue::HexStringLiteral(s) => parse_hex_string(s)?,
|
||||
SqlValue::HexStringLiteral(s) => {
|
||||
// Should not directly write binary into json column
|
||||
ensure!(
|
||||
!matches!(data_type, ConcreteDataType::Json(_)),
|
||||
ColumnTypeMismatchSnafu {
|
||||
column_name,
|
||||
expect: ConcreteDataType::binary_datatype(),
|
||||
actual: ConcreteDataType::json_datatype(),
|
||||
}
|
||||
);
|
||||
|
||||
parse_hex_string(s)?
|
||||
}
|
||||
SqlValue::Placeholder(s) => return InvalidSqlValueSnafu { value: s }.fail(),
|
||||
|
||||
// TODO(dennis): supports binary string
|
||||
@@ -571,6 +593,7 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<Co
|
||||
Ok(ConcreteDataType::decimal128_datatype(*p as u8, *s as i8))
|
||||
}
|
||||
},
|
||||
SqlDataType::JSON => Ok(ConcreteDataType::json_datatype()),
|
||||
_ => error::SqlTypeNotSupportedSnafu {
|
||||
t: data_type.clone(),
|
||||
}
|
||||
@@ -607,6 +630,7 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu
|
||||
ConcreteDataType::Decimal128(d) => Ok(SqlDataType::Decimal(
|
||||
ExactNumberInfo::PrecisionAndScale(d.precision() as u64, d.scale() as u64),
|
||||
)),
|
||||
ConcreteDataType::Json(_) => Ok(SqlDataType::JSON),
|
||||
ConcreteDataType::Duration(_)
|
||||
| ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
@@ -872,6 +896,35 @@ mod tests {
|
||||
);
|
||||
assert!(v.is_err());
|
||||
assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",);
|
||||
|
||||
let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::json_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(v.is_err());
|
||||
|
||||
let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string());
|
||||
let v = sql_value_to_value(
|
||||
"a",
|
||||
&ConcreteDataType::json_datatype(),
|
||||
&sql_val,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
Value::Binary(Bytes::from(
|
||||
jsonb::parse_value(r#"{"a":"b"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec()
|
||||
.as_slice()
|
||||
)),
|
||||
v
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1037,6 +1090,36 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_json_to_jsonb() {
|
||||
match parse_string_to_value(
|
||||
"json_col",
|
||||
r#"{"a": "b"}"#.to_string(),
|
||||
&ConcreteDataType::json_datatype(),
|
||||
None,
|
||||
) {
|
||||
Ok(Value::Binary(b)) => {
|
||||
assert_eq!(
|
||||
b,
|
||||
jsonb::parse_value(r#"{"a": "b"}"#.as_bytes())
|
||||
.unwrap()
|
||||
.to_vec()
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
assert!(parse_string_to_value(
|
||||
"json_col",
|
||||
r#"Nicola Kovac is the best rifler in the world"#.to_string(),
|
||||
&ConcreteDataType::json_datatype(),
|
||||
None,
|
||||
)
|
||||
.is_err())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_column_default_constraint() {
|
||||
let bool_value = sqlparser::ast::Value::Boolean(true);
|
||||
|
||||
@@ -383,26 +383,42 @@ pub async fn test_postgres_crud(store_type: StorageType) {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
sqlx::query("create table demo(i bigint, ts timestamp time index, d date, dt datetime)")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"create table demo(i bigint, ts timestamp time index, d date, dt datetime, b blob, j json)",
|
||||
)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for i in 0..10 {
|
||||
let d = NaiveDate::from_yo_opt(2015, 100).unwrap();
|
||||
let dt = d.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp_millis();
|
||||
let bytes = "hello".as_bytes();
|
||||
let json = serde_json::json!({
|
||||
"code": 200,
|
||||
"success": true,
|
||||
"payload": {
|
||||
"features": [
|
||||
"serde",
|
||||
"json"
|
||||
],
|
||||
"homepage": null
|
||||
}
|
||||
});
|
||||
|
||||
sqlx::query("insert into demo values($1, $2, $3, $4)")
|
||||
sqlx::query("insert into demo values($1, $2, $3, $4, $5, $6)")
|
||||
.bind(i)
|
||||
.bind(i)
|
||||
.bind(d)
|
||||
.bind(dt)
|
||||
.bind(bytes)
|
||||
.bind(json)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let rows = sqlx::query("select i,d,dt from demo")
|
||||
let rows = sqlx::query("select i,d,dt,b,j from demo")
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -412,6 +428,8 @@ pub async fn test_postgres_crud(store_type: StorageType) {
|
||||
let ret: i64 = row.get("i");
|
||||
let d: NaiveDate = row.get("d");
|
||||
let dt: NaiveDateTime = row.get("dt");
|
||||
let bytes: Vec<u8> = row.get("b");
|
||||
let json: serde_json::Value = row.get("j");
|
||||
|
||||
assert_eq!(ret, i as i64);
|
||||
|
||||
@@ -422,6 +440,20 @@ pub async fn test_postgres_crud(store_type: StorageType) {
|
||||
.and_then(|d| d.and_hms_opt(0, 0, 0))
|
||||
.unwrap();
|
||||
assert_eq!(expected_dt, dt);
|
||||
assert_eq!("hello".as_bytes(), bytes);
|
||||
|
||||
let expected_j = serde_json::json!({
|
||||
"code": 200,
|
||||
"success": true,
|
||||
"payload": {
|
||||
"features": [
|
||||
"serde",
|
||||
"json"
|
||||
],
|
||||
"homepage": null
|
||||
}
|
||||
});
|
||||
assert_eq!(json.to_string(), expected_j.to_string());
|
||||
}
|
||||
|
||||
let rows = sqlx::query("select i from demo where i=$1")
|
||||
|
||||
156
tests/cases/standalone/common/types/json/json.result
Normal file
156
tests/cases/standalone/common/types/json/json.result
Normal file
@@ -0,0 +1,156 @@
|
||||
CREATE TABLE jsons (j JSON, t timestamp time index);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
--Insert valid json strings--
|
||||
INSERT INTO jsons VALUES('[null]', 0),
|
||||
('[true]', 1),
|
||||
('[false]', 2),
|
||||
('[0]', 3),
|
||||
('["foo"]', 4),
|
||||
('[]', 5),
|
||||
('{}', 6),
|
||||
('[0,1]', 7),
|
||||
('{"foo":"bar"}', 8),
|
||||
('{"a":null,"foo":"bar"}', 9),
|
||||
('[-1]', 10),
|
||||
('{"entities": {
|
||||
"description": {
|
||||
"urls": [
|
||||
{
|
||||
"url": "http://t.co/QMLJeFmfMT",
|
||||
"expanded_url": "http://www.pixiv.net/member.php?id=4776",
|
||||
"display_url": "pixiv.net/member.php?id=…",
|
||||
"indices": [
|
||||
58,
|
||||
80
|
||||
]
|
||||
},
|
||||
{
|
||||
"url": "http://t.co/LU8T7vmU3h",
|
||||
"expanded_url": "http://ask.fm/KATANA77",
|
||||
"display_url": "ask.fm/KATANA77",
|
||||
"indices": [
|
||||
95,
|
||||
117
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}}', 11);
|
||||
|
||||
Affected Rows: 12
|
||||
|
||||
INSERT INTO jsons VALUES(to_json('[null]'), 12),
|
||||
(to_json('[true]'), 13),
|
||||
(to_json('[false]'), 14),
|
||||
(to_json('[0]'), 15),
|
||||
(to_json('["foo"]'), 16),
|
||||
(to_json('[]'), 17),
|
||||
(to_json('{}'), 18),
|
||||
(to_json('[0,1]'), 19),
|
||||
(to_json('{"foo":"bar"}'), 20),
|
||||
(to_json('{"a":null,"foo":"bar"}'), 21),
|
||||
(to_json('[-1]'), 22),
|
||||
(to_json('[-2147483648]'), 23),
|
||||
(to_json('{"entities": {
|
||||
"description": {
|
||||
"urls": [
|
||||
{
|
||||
"url": "http://t.co/QMLJeFmfMT",
|
||||
"expanded_url": "http://www.pixiv.net/member.php?id=4776",
|
||||
"display_url": "pixiv.net/member.php?id=…",
|
||||
"indices": [
|
||||
58,
|
||||
80
|
||||
]
|
||||
},
|
||||
{
|
||||
"url": "http://t.co/LU8T7vmU3h",
|
||||
"expanded_url": "http://ask.fm/KATANA77",
|
||||
"display_url": "ask.fm/KATANA77",
|
||||
"indices": [
|
||||
95,
|
||||
117
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}}'), 24);
|
||||
|
||||
Affected Rows: 13
|
||||
|
||||
SELECT json_to_string(j), t FROM jsons;
|
||||
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------+
|
||||
| json_to_string(jsons.j) | t |
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------+
|
||||
| [null] | 1970-01-01T00:00:00 |
|
||||
| [true] | 1970-01-01T00:00:00.001 |
|
||||
| [false] | 1970-01-01T00:00:00.002 |
|
||||
| [0] | 1970-01-01T00:00:00.003 |
|
||||
| ["foo"] | 1970-01-01T00:00:00.004 |
|
||||
| [] | 1970-01-01T00:00:00.005 |
|
||||
| {} | 1970-01-01T00:00:00.006 |
|
||||
| [0,1] | 1970-01-01T00:00:00.007 |
|
||||
| {"foo":"bar"} | 1970-01-01T00:00:00.008 |
|
||||
| {"a":null,"foo":"bar"} | 1970-01-01T00:00:00.009 |
|
||||
| [-1] | 1970-01-01T00:00:00.010 |
|
||||
| {"entities":{"description":{"urls":[{"display_url":"pixiv.net/member.php?id=…","expanded_url":"http://www.pixiv.net/member.php?id=4776","indices":[58,80],"url":"http://t.co/QMLJeFmfMT"},{"display_url":"ask.fm/KATANA77","expanded_url":"http://ask.fm/KATANA77","indices":[95,117],"url":"http://t.co/LU8T7vmU3h"}]}}} | 1970-01-01T00:00:00.011 |
|
||||
| [null] | 1970-01-01T00:00:00.012 |
|
||||
| [true] | 1970-01-01T00:00:00.013 |
|
||||
| [false] | 1970-01-01T00:00:00.014 |
|
||||
| [0] | 1970-01-01T00:00:00.015 |
|
||||
| ["foo"] | 1970-01-01T00:00:00.016 |
|
||||
| [] | 1970-01-01T00:00:00.017 |
|
||||
| {} | 1970-01-01T00:00:00.018 |
|
||||
| [0,1] | 1970-01-01T00:00:00.019 |
|
||||
| {"foo":"bar"} | 1970-01-01T00:00:00.020 |
|
||||
| {"a":null,"foo":"bar"} | 1970-01-01T00:00:00.021 |
|
||||
| [-1] | 1970-01-01T00:00:00.022 |
|
||||
| [-2147483648] | 1970-01-01T00:00:00.023 |
|
||||
| {"entities":{"description":{"urls":[{"display_url":"pixiv.net/member.php?id=…","expanded_url":"http://www.pixiv.net/member.php?id=4776","indices":[58,80],"url":"http://t.co/QMLJeFmfMT"},{"display_url":"ask.fm/KATANA77","expanded_url":"http://ask.fm/KATANA77","indices":[95,117],"url":"http://t.co/LU8T7vmU3h"}]}}} | 1970-01-01T00:00:00.024 |
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------+
|
||||
|
||||
--Insert invalid json strings--
|
||||
DELETE FROM jsons;
|
||||
|
||||
Affected Rows: 25
|
||||
|
||||
INSERT INTO jsons VALUES(to_json('{"a":1, "b":2, "c":3'), 4);
|
||||
|
||||
Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: Cannot convert the string to json, have: {"a":1, "b":2, "c":3
|
||||
|
||||
INSERT INTO jsons VALUES(to_json('Morning my friends, have a nice day :)'), 5);
|
||||
|
||||
Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: Cannot convert the string to json, have: Morning my friends, have a nice day :)
|
||||
|
||||
SELECT json_to_string(j), t FROM jsons;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
CREATE TABLE json_empty (j JSON, t timestamp time index);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO json_empty VALUES(NULL, 2);
|
||||
|
||||
Affected Rows: 1
|
||||
|
||||
SELECT json_to_string(j), t FROM json_empty;
|
||||
|
||||
+------------------------------+-------------------------+
|
||||
| json_to_string(json_empty.j) | t |
|
||||
+------------------------------+-------------------------+
|
||||
| | 1970-01-01T00:00:00.002 |
|
||||
+------------------------------+-------------------------+
|
||||
|
||||
drop table jsons;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table json_empty;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
96
tests/cases/standalone/common/types/json/json.sql
Normal file
96
tests/cases/standalone/common/types/json/json.sql
Normal file
@@ -0,0 +1,96 @@
|
||||
CREATE TABLE jsons (j JSON, t timestamp time index);
|
||||
|
||||
--Insert valid json strings--
|
||||
INSERT INTO jsons VALUES('[null]', 0),
|
||||
('[true]', 1),
|
||||
('[false]', 2),
|
||||
('[0]', 3),
|
||||
('["foo"]', 4),
|
||||
('[]', 5),
|
||||
('{}', 6),
|
||||
('[0,1]', 7),
|
||||
('{"foo":"bar"}', 8),
|
||||
('{"a":null,"foo":"bar"}', 9),
|
||||
('[-1]', 10),
|
||||
('{"entities": {
|
||||
"description": {
|
||||
"urls": [
|
||||
{
|
||||
"url": "http://t.co/QMLJeFmfMT",
|
||||
"expanded_url": "http://www.pixiv.net/member.php?id=4776",
|
||||
"display_url": "pixiv.net/member.php?id=…",
|
||||
"indices": [
|
||||
58,
|
||||
80
|
||||
]
|
||||
},
|
||||
{
|
||||
"url": "http://t.co/LU8T7vmU3h",
|
||||
"expanded_url": "http://ask.fm/KATANA77",
|
||||
"display_url": "ask.fm/KATANA77",
|
||||
"indices": [
|
||||
95,
|
||||
117
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}}', 11);
|
||||
|
||||
INSERT INTO jsons VALUES(to_json('[null]'), 12),
|
||||
(to_json('[true]'), 13),
|
||||
(to_json('[false]'), 14),
|
||||
(to_json('[0]'), 15),
|
||||
(to_json('["foo"]'), 16),
|
||||
(to_json('[]'), 17),
|
||||
(to_json('{}'), 18),
|
||||
(to_json('[0,1]'), 19),
|
||||
(to_json('{"foo":"bar"}'), 20),
|
||||
(to_json('{"a":null,"foo":"bar"}'), 21),
|
||||
(to_json('[-1]'), 22),
|
||||
(to_json('[-2147483648]'), 23),
|
||||
(to_json('{"entities": {
|
||||
"description": {
|
||||
"urls": [
|
||||
{
|
||||
"url": "http://t.co/QMLJeFmfMT",
|
||||
"expanded_url": "http://www.pixiv.net/member.php?id=4776",
|
||||
"display_url": "pixiv.net/member.php?id=…",
|
||||
"indices": [
|
||||
58,
|
||||
80
|
||||
]
|
||||
},
|
||||
{
|
||||
"url": "http://t.co/LU8T7vmU3h",
|
||||
"expanded_url": "http://ask.fm/KATANA77",
|
||||
"display_url": "ask.fm/KATANA77",
|
||||
"indices": [
|
||||
95,
|
||||
117
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}}'), 24);
|
||||
|
||||
SELECT json_to_string(j), t FROM jsons;
|
||||
|
||||
--Insert invalid json strings--
|
||||
DELETE FROM jsons;
|
||||
|
||||
INSERT INTO jsons VALUES(to_json('{"a":1, "b":2, "c":3'), 4);
|
||||
|
||||
INSERT INTO jsons VALUES(to_json('Morning my friends, have a nice day :)'), 5);
|
||||
|
||||
SELECT json_to_string(j), t FROM jsons;
|
||||
|
||||
CREATE TABLE json_empty (j JSON, t timestamp time index);
|
||||
|
||||
INSERT INTO json_empty VALUES(NULL, 2);
|
||||
|
||||
SELECT json_to_string(j), t FROM json_empty;
|
||||
|
||||
drop table jsons;
|
||||
|
||||
drop table json_empty;
|
||||
Reference in New Issue
Block a user