feat(copy_to_csv): add date_format/timestamp_format/time_format. (#6995)

feat(copy_to_csv): add `date_format` and so on to `Copy ... to with` syntax

Signed-off-by: Yihai Lin <yihai-lin@foxmail.com>
This commit is contained in:
Lin Yihai
2025-09-24 14:22:53 +08:00
committed by GitHub
parent c7050831db
commit b5a8725582
14 changed files with 224 additions and 37 deletions

View File

@@ -864,6 +864,18 @@ pub enum Error {
location: Location,
},
#[snafu(display(
"{} not supported when transforming to {} format type",
format,
file_format
))]
TimestampFormatNotSupported {
file_format: String,
format: String,
#[snafu(implicit)]
location: Location,
},
#[cfg(feature = "enterprise")]
#[snafu(display("Too large duration"))]
TooLargeDuration {
@@ -1002,6 +1014,7 @@ impl ErrorExt for Error {
Error::InvalidProcessId { .. } => StatusCode::InvalidArguments,
Error::ProcessManagerMissing { .. } => StatusCode::Unexpected,
Error::PathNotFound { .. } => StatusCode::InvalidArguments,
Error::TimestampFormatNotSupported { .. } => StatusCode::InvalidArguments,
Error::SqlCommon { source, .. } => source.status_code(),
}
}

View File

@@ -640,6 +640,41 @@ fn to_copy_query_request(stmt: CopyQueryToArgument) -> Result<CopyQueryToRequest
})
}
// Verifies time related format is valid
fn verify_time_related_format(with: &OptionMap) -> Result<()> {
let time_format = with.get(common_datasource::file_format::TIME_FORMAT);
let date_format = with.get(common_datasource::file_format::DATE_FORMAT);
let timestamp_format = with.get(common_datasource::file_format::TIMESTAMP_FORMAT);
let file_format = with.get(common_datasource::file_format::FORMAT_TYPE);
if !matches!(file_format, Some(f) if f.eq_ignore_ascii_case("csv")) {
ensure!(
time_format.is_none() && date_format.is_none() && timestamp_format.is_none(),
error::TimestampFormatNotSupportedSnafu {
format: "<unknown>".to_string(),
file_format: file_format.cloned().unwrap_or_default(),
}
);
}
for (key, format_opt) in [
(common_datasource::file_format::TIME_FORMAT, time_format),
(common_datasource::file_format::DATE_FORMAT, date_format),
(
common_datasource::file_format::TIMESTAMP_FORMAT,
timestamp_format,
),
] {
if let Some(format) = format_opt {
chrono::format::strftime::StrftimeItems::new(format)
.parse()
.map_err(|_| error::InvalidCopyParameterSnafu { key, value: format }.build())?;
}
}
Ok(())
}
fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<CopyTableRequest> {
let direction = match stmt {
CopyTable::To(_) => CopyDirection::Export,
@@ -664,6 +699,8 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<
let timestamp_range = timestamp_range_from_option_map(&with, &query_ctx)?;
verify_time_related_format(&with)?;
let pattern = with
.get(common_datasource::file_format::FILE_PATTERN)
.cloned();
@@ -828,7 +865,7 @@ mod tests {
use crate::statement::copy_database::{
COPY_DATABASE_TIME_END_KEY, COPY_DATABASE_TIME_START_KEY,
};
use crate::statement::timestamp_range_from_option_map;
use crate::statement::{timestamp_range_from_option_map, verify_time_related_format};
fn check_timestamp_range((start, end): (&str, &str)) -> error::Result<Option<TimestampRange>> {
let query_ctx = QueryContextBuilder::default()
@@ -864,4 +901,62 @@ mod tests {
error::Error::InvalidTimestampRange { .. }
);
}
#[test]
fn test_verify_timestamp_format() {
let map = OptionMap::from(
[
(
common_datasource::file_format::TIMESTAMP_FORMAT.to_string(),
"%Y-%m-%d %H:%M:%S".to_string(),
),
(
common_datasource::file_format::FORMAT_TYPE.to_string(),
"csv".to_string(),
),
]
.into_iter()
.collect::<HashMap<_, _>>(),
);
assert!(verify_time_related_format(&map).is_ok());
let map = OptionMap::from(
[
(
common_datasource::file_format::TIMESTAMP_FORMAT.to_string(),
"%Y-%m-%d %H:%M:%S".to_string(),
),
(
common_datasource::file_format::FORMAT_TYPE.to_string(),
"json".to_string(),
),
]
.into_iter()
.collect::<HashMap<_, _>>(),
);
assert_matches!(
verify_time_related_format(&map).unwrap_err(),
error::Error::TimestampFormatNotSupported { .. }
);
let map = OptionMap::from(
[
(
common_datasource::file_format::TIMESTAMP_FORMAT.to_string(),
"%111112".to_string(),
),
(
common_datasource::file_format::FORMAT_TYPE.to_string(),
"csv".to_string(),
),
]
.into_iter()
.collect::<HashMap<_, _>>(),
);
assert_matches!(
verify_time_related_format(&map).unwrap_err(),
error::Error::InvalidCopyParameter { .. }
);
}
}

View File

@@ -386,7 +386,7 @@ impl StatementExecutor {
}
let path = entry.path();
let file_metadata = self
.collect_metadata(&object_store, format, path.to_string())
.collect_metadata(&object_store, format.clone(), path.to_string())
.await?;
let file_schema = file_metadata.schema();

View File

@@ -66,12 +66,13 @@ impl StatementExecutor {
map_json_type_to_string_schema,
));
match format {
Format::Csv(_) => stream_to_csv(
Format::Csv(format) => stream_to_csv(
Box::pin(DfRecordBatchStreamAdapter::new(stream)),
object_store,
path,
threshold,
WRITE_CONCURRENCY,
format,
)
.await
.context(error::WriteStreamToFileSnafu { path }),
@@ -96,7 +97,10 @@ impl StatementExecutor {
.await
.context(error::WriteStreamToFileSnafu { path })
}
_ => error::UnsupportedFormatSnafu { format: *format }.fail(),
_ => error::UnsupportedFormatSnafu {
format: format.clone(),
}
.fail(),
}
}