ci: add nightly jsonbench test (#7750)

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2026-05-29 15:07:49 +08:00
committed by GitHub
parent ba15a9c056
commit 869a584f8a
7 changed files with 315 additions and 86 deletions

162
.github/workflows/nightly-jsonbench.yaml vendored Normal file
View File

@@ -0,0 +1,162 @@
name: Nightly JSONBench
on:
schedule:
# Trigger at 00:00(Asia/Shanghai) on every weekday.
- cron: "0 16 * * 0-4"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
allocate-runner:
name: Allocate runner
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
outputs:
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
# The following EC2 resource id will be used for resource releasing.
linux-arm64-ec2-runner-label: ${{ steps.start-linux-arm64-runner.outputs.label }}
linux-arm64-ec2-runner-instance-id: ${{ steps.start-linux-arm64-runner.outputs.ec2-instance-id }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Allocate Linux ARM64 runner
uses: ./.github/actions/start-runner
id: start-linux-arm64-runner
with:
runner: ${{ vars.DEFAULT_ARM64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_ARM64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
jsonbench:
name: Run JSONBench
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
needs: [ allocate-runner ]
runs-on: ${{ needs.allocate-runner.outputs.linux-arm64-runner }}
timeout-minutes: 120
env:
JSONBENCH_DATA_DIR: /home/runner/data/bluesky
JSONBENCH_OUTPUT_PREFIX: _ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
shared-key: "nightly-jsonbench"
cache-all-crates: "true"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Build GreptimeDB
run: cargo build --profile nightly --bin greptime
- name: Reclaim disk space
shell: bash
run: |
set -euo pipefail
mkdir -p "${RUNNER_TEMP}/greptimedb-bin"
cp ./target/nightly/greptime "${RUNNER_TEMP}/greptimedb-bin/greptime"
chmod +x "${RUNNER_TEMP}/greptimedb-bin/greptime"
rm -rf ./target
- name: Run JSONBench
shell: bash
run: |
set -euo pipefail
cd "${RUNNER_TEMP}"
cp "${RUNNER_TEMP}/greptimedb-bin/greptime" ./greptime
chmod +x ./greptime
export GREPTIMEDB_STANDALONE__WAL__DIR=greptimedb_data/wal
export GREPTIMEDB_STANDALONE__STORAGE__DATA_HOME=greptimedb_data
export GREPTIMEDB_STANDALONE__LOGGING__DIR=greptimedb_data/logs
export GREPTIMEDB_STANDALONE__LOGGING__APPEND_STDOUT=false
export GREPTIMEDB_STANDALONE__HTTP__BODY_LIMIT=1GB
export GREPTIMEDB_STANDALONE__HTTP__TIMEOUT=500s
./greptime standalone start > greptimedb.log 2>&1 &
greptime_pid=$!
trap 'kill "${greptime_pid}" 2>/dev/null || true' EXIT
until curl -s --fail -o /dev/null http://localhost:4000/health; do
if ! kill -0 "${greptime_pid}" 2>/dev/null; then
cat greptimedb.log
exit 1
fi
sleep 1
done
git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench
cp ./greptime JSONBench/greptimedb/greptime
cd JSONBench/greptimedb
./main.sh 3 "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false
- name: Upload JSONBench results
if: always()
uses: actions/upload-artifact@v4
with:
name: jsonbench-results
path: |
${{ runner.temp }}/greptimedb.log
${{ runner.temp }}/JSONBench/greptimedb/*.log
${{ runner.temp }}/JSONBench/greptimedb/*.total_size
${{ runner.temp }}/JSONBench/greptimedb/*.data_size
${{ runner.temp }}/JSONBench/greptimedb/*.index_size
${{ runner.temp }}/JSONBench/greptimedb/*.count
${{ runner.temp }}/JSONBench/greptimedb/*.results_runtime
${{ runner.temp }}/JSONBench/greptimedb/*.query_results
if-no-files-found: ignore
retention-days: 7
stop-linux-arm64-runner:
name: Stop Linux ARM64 runner
# It's always run as the last job in the workflow to make sure that the runner is released.
if: ${{ always() }}
runs-on: ubuntu-latest
needs: [
allocate-runner,
jsonbench,
]
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop Linux ARM64 runner
uses: ./.github/actions/stop-runner
with:
label: ${{ needs.allocate-runner.outputs.linux-arm64-ec2-runner-label }}
ec2-instance-id: ${{ needs.allocate-runner.outputs.linux-arm64-ec2-runner-instance-id }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}

View File

@@ -26,12 +26,12 @@ use std::sync::Arc;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value as Json};
use snafu::{OptionExt, ResultExt, ensure};
use snafu::{OptionExt, ResultExt};
use crate::error::{self, InvalidJsonSnafu, Result, SerializeSnafu};
use crate::json::value::{JsonValue, JsonVariant};
use crate::types::json_type::{JsonNativeType, JsonNumberType, JsonObjectType};
use crate::types::{StructField, StructType};
use crate::types::{JsonType, StructField, StructType};
use crate::value::{ListValue, StructValue, Value};
/// The configuration of JSON encoding
@@ -305,33 +305,47 @@ fn encode_json_array_with_context<'a>(
) -> Result<JsonValue> {
let json_array_len = json_array.len();
let mut items = Vec::with_capacity(json_array_len);
let mut element_type = item_type.cloned();
for (index, value) in json_array.into_iter().enumerate() {
let array_context = context.with_key(&index.to_string());
let item_value =
encode_json_value_with_context(value, element_type.as_ref(), &array_context)?;
let item_type = item_value.json_type().native_type().clone();
items.push(item_value.into_variant());
// Determine the common type for the list
if let Some(current_type) = &element_type {
// It's valid for json array to have different types of items, for example,
// ["a string", 1]. However, the `JsonValue` will be converted to Arrow list array,
// which requires all items have exactly same type. So we forbid the different types
// case here. Besides, it's not common for items in a json array to differ. So I think
// we are good here.
ensure!(
item_type == *current_type,
error::InvalidJsonSnafu {
value: "all items in json array must have the same type"
}
);
} else {
element_type = Some(item_type);
}
let item_value = encode_json_value_with_context(value, None, &array_context)?;
items.push(item_value);
}
// In specification, it's valid for a JSON array to have different types of items, for example,
// ["a string", 1]. However, in implementation, the `JsonValue` will be converted to Arrow list
// array, which requires all items have exactly the same type. So we merge out the maybe
// different item types to a unified type, and align all the item values to it.
let provided_item_type = item_type.map(|x| JsonType::new_json2(x.clone()));
let merged_item_type = if let Some((first, rests)) = items.split_first() {
let mut merged = first.json_type().clone();
for rest in rests.iter().map(|x| x.json_type()) {
if matches!(merged.native_type(), JsonNativeType::Variant) {
break;
}
merged.merge(rest)?;
}
Some(merged)
} else {
None
};
let unified_item_type = match (provided_item_type, merged_item_type) {
(Some(mut x), Some(y)) => {
x.merge(&y)?;
Some(x)
}
(x, y) => x.or(y),
};
if let Some(unified_item_type) = unified_item_type {
for item in &mut items {
item.try_align(&unified_item_type)?;
}
}
let items = items
.into_iter()
.map(|x| x.into_variant())
.collect::<Vec<_>>();
Ok(JsonValue::new(JsonVariant::Array(items)))
}
@@ -1050,11 +1064,8 @@ mod tests {
fn test_encode_json_array_mixed_types() {
let json = json!([1, "hello", true, 3.15]);
let settings = JsonStructureSettings::Structured(None);
let result = settings.encode_with_type(json, None);
assert_eq!(
result.unwrap_err().to_string(),
"Invalid JSON: all items in json array must have the same type"
);
let value = settings.encode_with_type(json, None).unwrap();
assert_eq!(value.data_type().to_string(), r#"Json2["<Variant>"]"#);
}
#[test]
@@ -1276,12 +1287,12 @@ mod tests {
#[test]
fn test_encode_json_array_with_item_type() {
let json = json!([1, 2, 3]);
let item_type = Arc::new(ConcreteDataType::uint64_datatype());
let item_type = Arc::new(ConcreteDataType::int64_datatype());
let settings = JsonStructureSettings::Structured(None);
let result = settings
.encode_with_type(
json,
Some(&JsonNativeType::Array(Box::new(JsonNativeType::u64()))),
Some(&JsonNativeType::Array(Box::new(JsonNativeType::i64()))),
)
.unwrap()
.into_json_inner()
@@ -1289,9 +1300,9 @@ mod tests {
if let Value::List(list_value) = result {
assert_eq!(list_value.items().len(), 3);
assert_eq!(list_value.items()[0], Value::UInt64(1));
assert_eq!(list_value.items()[1], Value::UInt64(2));
assert_eq!(list_value.items()[2], Value::UInt64(3));
assert_eq!(list_value.items()[0], Value::Int64(1));
assert_eq!(list_value.items()[1], Value::Int64(2));
assert_eq!(list_value.items()[2], Value::Int64(3));
assert_eq!(list_value.datatype(), item_type);
} else {
panic!("Expected List value");
@@ -2249,10 +2260,10 @@ mod tests {
)])),
);
let decoded_struct = settings.decode_struct(array_struct);
let decoded_struct = settings.decode_struct(array_struct).unwrap();
assert_eq!(
decoded_struct.unwrap_err().to_string(),
"Invalid JSON: all items in json array must have the same type"
format!("{decoded_struct:?}"),
r#"StructValue { items: [List(ListValue { items: [Binary(Bytes(b"1")), Binary(Bytes(b"\"hello\"")), Binary(Bytes(b"true")), Binary(Bytes(b"3.15"))], datatype: Binary(BinaryType { repr_type: Binary }) })], fields: StructType { fields: [StructField { name: "value", data_type: List(ListType { item_type: Binary(BinaryType { repr_type: Binary }) }), nullable: true, metadata: {} }] } }"#
);
}

View File

@@ -65,6 +65,14 @@ impl JsonNumber {
JsonNumber::Float(n) => n.0,
}
}
fn native_type(&self) -> JsonNativeType {
match self {
JsonNumber::PosInt(_) => JsonNativeType::u64(),
JsonNumber::NegInt(_) => JsonNativeType::i64(),
JsonNumber::Float(_) => JsonNativeType::f64(),
}
}
}
impl From<u64> for JsonNumber {
@@ -147,26 +155,14 @@ impl JsonVariant {
match self {
JsonVariant::Null => JsonNativeType::Null,
JsonVariant::Bool(_) => JsonNativeType::Bool,
JsonVariant::Number(n) => match n {
JsonNumber::PosInt(_) => JsonNativeType::u64(),
JsonNumber::NegInt(_) => JsonNativeType::i64(),
JsonNumber::Float(_) => JsonNativeType::f64(),
},
JsonVariant::Number(n) => n.native_type(),
JsonVariant::String(_) => JsonNativeType::String,
JsonVariant::Array(array) => {
let item_type = if let Some(first) = array.first() {
first.native_type()
} else {
JsonNativeType::Null
};
JsonNativeType::Array(Box::new(item_type))
json_array_native_type(array.iter().map(JsonVariant::native_type))
}
JsonVariant::Object(object) => {
json_object_native_type(object.iter().map(|(k, v)| (k, v.native_type())))
}
JsonVariant::Object(object) => JsonNativeType::Object(
object
.iter()
.map(|(k, v)| (k.clone(), v.native_type()))
.collect(),
),
JsonVariant::Variant(_) => JsonNativeType::Variant,
}
}
@@ -469,6 +465,7 @@ impl JsonValue {
.collect::<Result<_>>()?,
),
(JsonVariant::Object(kvs), _) if kvs.is_empty() => JsonVariant::Null,
(JsonVariant::Object(mut kvs), JsonNativeType::Object(expected)) => {
ensure!(
expected.keys().len() >= kvs.keys().len()
@@ -517,7 +514,7 @@ impl JsonValue {
let x = std::mem::take(&mut self.json_variant);
self.json_variant = helper(x, expected.native_type())?;
self.json_type = OnceLock::from(expected.clone());
self.json_type = OnceLock::new();
Ok(())
}
}
@@ -623,35 +620,55 @@ pub enum JsonVariantRef<'a> {
}
impl JsonVariantRef<'_> {
fn json_type(&self) -> JsonType {
fn native_type(v: &JsonVariantRef<'_>) -> JsonNativeType {
match v {
JsonVariantRef::Null => JsonNativeType::Null,
JsonVariantRef::Bool(_) => JsonNativeType::Bool,
JsonVariantRef::Number(n) => match n {
JsonNumber::PosInt(_) => JsonNativeType::u64(),
JsonNumber::NegInt(_) => JsonNativeType::i64(),
JsonNumber::Float(_) => JsonNativeType::f64(),
},
JsonVariantRef::String(_) => JsonNativeType::String,
JsonVariantRef::Array(array) => {
let item_type = if let Some(first) = array.first() {
native_type(first)
} else {
JsonNativeType::Null
};
JsonNativeType::Array(Box::new(item_type))
}
JsonVariantRef::Object(object) => JsonNativeType::Object(
object
.iter()
.map(|(k, v)| (k.to_string(), native_type(v)))
.collect(),
),
JsonVariantRef::Variant(_) => JsonNativeType::Variant,
fn native_type(&self) -> JsonNativeType {
match self {
JsonVariantRef::Null => JsonNativeType::Null,
JsonVariantRef::Bool(_) => JsonNativeType::Bool,
JsonVariantRef::Number(n) => n.native_type(),
JsonVariantRef::String(_) => JsonNativeType::String,
JsonVariantRef::Array(array) => {
json_array_native_type(array.iter().map(JsonVariantRef::native_type))
}
JsonVariantRef::Object(object) => {
json_object_native_type(object.iter().map(|(k, v)| (*k, v.native_type())))
}
JsonVariantRef::Variant(_) => JsonNativeType::Variant,
}
JsonType::new_json2(native_type(self))
}
fn json_type(&self) -> JsonType {
JsonType::new_json2(self.native_type())
}
}
fn json_array_native_type<I>(items: I) -> JsonNativeType
where
I: IntoIterator<Item = JsonNativeType>,
{
let mut iter = items.into_iter();
let mut item_type = match iter.next() {
Some(t) => t,
None => return JsonNativeType::Array(Box::new(JsonNativeType::Null)),
};
for x in iter {
if matches!(item_type, JsonNativeType::Variant) {
break;
}
item_type.merge(&x);
}
JsonNativeType::Array(Box::new(item_type))
}
fn json_object_native_type<I, K>(fields: I) -> JsonNativeType
where
I: IntoIterator<Item = (K, JsonNativeType)>,
K: Into<String>,
{
let mut fields = fields.into_iter().peekable();
if fields.peek().is_none() {
JsonNativeType::Null
} else {
JsonNativeType::Object(fields.map(|(k, v)| (k.into(), v)).collect())
}
}
@@ -941,7 +958,6 @@ mod tests {
("name".to_string(), JsonVariant::Null),
])))
);
assert_eq!(value.json_type(), &expected);
// Object alignment should fail if the expected type misses any field from the value.
let expected = JsonType::new_json2(JsonNativeType::Object(JsonObjectType::from([(

View File

@@ -115,6 +115,14 @@ impl JsonNativeType {
(JsonNativeType::Null, that) => that.clone(),
(this, JsonNativeType::Null) => this,
(this, that) if this == *that => this,
(JsonNativeType::Number(x), JsonNativeType::Number(y)) => {
JsonNativeType::Number(match (x, y) {
(x, y) if x == *y => x,
(JsonNumberType::F64, _) | (_, JsonNumberType::F64) => JsonNumberType::F64,
_ => JsonNumberType::I64,
})
}
_ => JsonNativeType::Variant,
};
}
@@ -822,7 +830,7 @@ mod tests {
test(
"1.5",
&mut JsonType::new_json2(JsonNativeType::i64()),
Ok(r#""<Variant>""#),
Ok(r#""<Number>""#),
)?;
// Object merge should preserve existing fields and append missing fields.

View File

@@ -89,7 +89,9 @@ impl MutableVector for JsonVectorBuilder {
.fail();
};
let json_type = value.json_type();
self.merged_type.merge(json_type)?;
if !self.merged_type.is_include(json_type) {
self.merged_type.merge(json_type)?;
}
let value = JsonValue::new(JsonVariant::from(value.variant().clone()));
self.values.push(value);

View File

@@ -126,7 +126,7 @@ select j.a, j.a.x from json2_table order by ts;
| {"b":-2} | |
| {"b":3} | |
| {"b":-4} | |
| {"b":null} | |
| | |
| | |
| {"b":"s7"} | |
| {"b":8} | |
@@ -151,6 +151,14 @@ select j.c, j.y from json2_table order by ts;
| | false |
+-----------------------------------+-----------------------------------+
select j from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Failed to align JSON array, reason: Invalid argument error: use StructArray::try_new_with_length or StructArray::new_empty_fields to create a struct array with no fields so that the length can be set correctly
select * from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Failed to align JSON array, reason: Invalid argument error: use StructArray::try_new_with_length or StructArray::new_empty_fields to create a struct array with no fields so that the length can be set correctly
select j.a.b + 1 from json2_table order by ts;
+------------------------------------------------------------+
@@ -168,6 +176,19 @@ select j.a.b + 1 from json2_table order by ts;
| 11 |
+------------------------------------------------------------+
select abs(j.a.b) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
select j.d from json2_table order by ts;
+-----------------------------------+

View File

@@ -46,8 +46,17 @@ select j.a, j.a.x from json2_table order by ts;
select j.c, j.y from json2_table order by ts;
select j from json2_table order by ts;
select * from json2_table order by ts;
select j.a.b + 1 from json2_table order by ts;
select abs(j.a.b) from json2_table order by ts;
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
select j.d from json2_table order by ts;
drop table json2_table;