fix: add map datatype conversion in copy_table_from (#6185) (#6422)

Signed-off-by: Arshdeep54 <balarsh535@gmail.com>
This commit is contained in:
Arshdeep
2025-07-28 09:23:10 +05:30
committed by GitHub
parent 6ded2d267a
commit 2e571e351f
9 changed files with 392 additions and 11 deletions

View File

@@ -90,6 +90,7 @@ datafusion-expr.workspace = true
hex.workspace = true
http.workspace = true
itertools.workspace = true
jsonb.workspace = true
opentelemetry-proto.workspace = true
partition.workspace = true
paste.workspace = true

View File

@@ -2199,3 +2199,117 @@ WITH(
}
}
}
#[apply(both_instances_cases)]
async fn test_copy_parquet_map_to_json(instance: Arc<dyn MockInstance>) {
let instance = instance.frontend();
let output = execute_sql(
&instance,
r#"CREATE TABLE map_json_test (
"id" INT,
map_data JSON,
ts TIMESTAMP TIME INDEX
);"#,
)
.await
.data;
assert!(matches!(output, OutputData::AffectedRows(0)));
let parquet_path = find_testing_resource("/tests/data/parquet/map_to_json.parquet");
let output = execute_sql(
&instance,
&format!(
"COPY map_json_test FROM '{}' WITH (FORMAT='parquet');",
parquet_path
),
)
.await
.data;
assert!(matches!(output, OutputData::AffectedRows(5)));
let output = execute_sql(
&instance,
"SELECT \"id\", json_to_string(map_data), map_data FROM map_json_test ORDER BY \"id\";",
)
.await
.data;
let expected_jsons = [
r#"{"a":"1","b":"2","c":"hello"}"#,
r#"{"x":"42","y":"test"}"#,
r#"{}"#,
r#"{"single":"value"}"#,
r#"{"complex":"structure","nested":"data"}"#,
];
let binary: Vec<String> = expected_jsons
.iter()
.map(|json_str| {
let jsonb_value = jsonb::parse_value(json_str.as_bytes()).unwrap();
hex::encode(jsonb_value.to_vec())
})
.collect();
let expected = format!(
r#"+----+-----------------------------------------+----------------------------------------------------------------------------------------------+
| id | json_to_string(map_json_test.map_data) | map_data |
+----+-----------------------------------------+----------------------------------------------------------------------------------------------+
| 1 | {{"a":"1","b":"2","c":"hello"}} | {:<92} |
| 2 | {{"x":"42","y":"test"}} | {:<92} |
| 3 | {{}} | {:<92} |
| 4 | {{"single":"value"}} | {:<92} |
| 5 | {{"complex":"structure","nested":"data"}} | {:<92} |
+----+-----------------------------------------+----------------------------------------------------------------------------------------------+"#,
binary[0], binary[1], binary[2], binary[3], binary[4],
);
check_output_stream(output, &expected).await;
}
#[apply(both_instances_cases)]
async fn test_copy_parquet_map_to_binary(instance: Arc<dyn MockInstance>) {
let instance = instance.frontend();
let output = execute_sql(
&instance,
r#"CREATE TABLE map_bin_test (
"id" INT,
map_data BINARY,
ts TIMESTAMP TIME INDEX
);"#,
)
.await
.data;
assert!(matches!(output, OutputData::AffectedRows(0)));
let parquet_path = find_testing_resource("/tests/data/parquet/map_to_json.parquet");
let output = execute_sql(
&instance,
&format!(
"COPY map_bin_test FROM '{}' WITH (FORMAT='parquet');",
parquet_path
),
)
.await
.data;
assert!(matches!(output, OutputData::AffectedRows(5)));
let output = execute_sql(
&instance,
"SELECT \"id\", CAST(map_data AS STRING) FROM map_bin_test ORDER BY \"id\";",
)
.await
.data;
let expected = r#"+----+-----------------------------------------+
| id | map_bin_test.map_data |
+----+-----------------------------------------+
| 1 | {"a":"1","b":"2","c":"hello"} |
| 2 | {"x":"42","y":"test"} |
| 3 | {} |
| 4 | {"single":"value"} |
| 5 | {"complex":"structure","nested":"data"} |
+----+-----------------------------------------+"#;
check_output_stream(output, expected).await;
}