feat: implement postgres copy to stdout (#7709)

* feat: update pgwire

* feat: add special parser for copy to stdout

* feat: implement copy to stdout

* fix: improve code

* fix: expect optional with

* fix: lint

* feat: correct encoder using and refactor

* chore: fmt

* refactor: update api

* chore: use released dependencies

* fix: update datafusion-pg-catalog to support schema query

* fix: support for double quoted identifier

* feat: update datafusion-postgres to support schema.table

* refactor: use pgsqlparser container

* refactor: remove unquote which is no longer needed

* fix: correctly handle invalid query

* fix: correct handle null in nano timestamp

* test: add a new test for additional close )
This commit is contained in:
Ning Sun
2026-02-26 11:41:33 +08:00
committed by GitHub
parent b0fb4abbdf
commit 0f6b8ff815
8 changed files with 986 additions and 164 deletions

View File

@@ -32,22 +32,34 @@ impl ParserContext<'_> {
pub(crate) fn parse_copy(&mut self) -> Result<Statement> {
let _ = self.parser.next_token();
let next = self.parser.peek_token();
let copy = if next.token == Token::LParen {
if next.token == Token::LParen {
let copy_query = self.parse_copy_query_to()?;
crate::statements::copy::Copy::CopyQueryTo(copy_query)
// the COPY ... TO STDOUT is a special case for postgres wire protocol
// the logic is completely identical to query, but with an alternative data encoding on transport
//
// so at the query engine level, we simple parse the command as it's inner query
// we will deal with the encoding and its format options in server/src/postgres/handler.rs
if copy_query.arg.location == "STDOUT" {
Ok(*copy_query.query)
} else {
Ok(Statement::Copy(crate::statements::copy::Copy::CopyQueryTo(
copy_query,
)))
}
} else if let Word(word) = next.token
&& word.keyword == Keyword::DATABASE
{
let _ = self.parser.next_token();
let copy_database = self.parser_copy_database()?;
crate::statements::copy::Copy::CopyDatabase(copy_database)
Ok(Statement::Copy(
crate::statements::copy::Copy::CopyDatabase(copy_database),
))
} else {
let copy_table = self.parse_copy_table()?;
crate::statements::copy::Copy::CopyTable(copy_table)
};
Ok(Statement::Copy(copy))
Ok(Statement::Copy(crate::statements::copy::Copy::CopyTable(
copy_table,
)))
}
}
fn parser_copy_database(&mut self) -> Result<CopyDatabase> {
@@ -147,21 +159,62 @@ impl ParserContext<'_> {
self.parser
.expect_keyword(Keyword::TO)
.context(error::SyntaxSnafu)?;
let (with, connection, location, limit) = self.parse_copy_parameters()?;
if limit.is_some() {
return error::InvalidSqlSnafu {
msg: "limit is not supported",
if self.parser.parse_keyword(Keyword::STDOUT) {
// early return without parsing options
// we will deal with copy to stdout on postgres protocol layer
// consume [WITH] (...) options if present (they will be ignored)
// we support both "WITH (FORMAT binary)" and "(FORMAT binary)"
// for PostgreSQL compatibility
// Check for optional WITH keyword or direct LParen (PostgreSQL syntax)
// Both "WITH (...)" and "(...)" are valid after STDOUT
let _ = self.parser.parse_keyword(Keyword::WITH);
if self.parser.peek_token().token == Token::LParen {
let _ = self.parser.next_token();
// consume all tokens until we find matching RParen
let mut depth = 1;
while depth > 0 {
match self.parser.next_token().token {
Token::LParen => depth += 1,
Token::RParen => depth -= 1,
Token::EOF => {
return error::UnexpectedTokenSnafu {
expected: ")",
actual: "EOF",
}
.fail();
}
_ => {}
}
}
}
.fail();
Ok(CopyQueryTo {
query: Box::new(query),
arg: CopyQueryToArgument {
with: OptionMap::default(),
connection: OptionMap::default(),
location: "STDOUT".to_string(),
},
})
} else {
let (with, connection, location, limit) = self.parse_copy_parameters()?;
if limit.is_some() {
return error::InvalidSqlSnafu {
msg: "limit is not supported",
}
.fail();
}
Ok(CopyQueryTo {
query: Box::new(query),
arg: CopyQueryToArgument {
with,
connection,
location,
},
})
}
Ok(CopyQueryTo {
query: Box::new(query),
arg: CopyQueryToArgument {
with,
connection,
location,
},
})
}
fn parse_copy_parameters(&mut self) -> Result<(OptionMap, OptionMap, String, Option<u64>)> {
@@ -540,4 +593,243 @@ mod tests {
)
}
}
#[test]
fn test_copy_query_to_stdout() {
let sql = "COPY (SELECT * FROM tbl WHERE ts > 10) TO STDOUT WITH (FORMAT = 'csv')";
let stmt =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap()
.pop()
.unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM tbl WHERE ts > 10",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_without_format() {
let sql = "COPY (SELECT generate_series(1, 2), generate_series(2, 3)) TO STDOUT";
let stmt =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap()
.pop()
.unwrap();
let query_str = "SELECT generate_series(1, 2), generate_series(2, 3)";
let expected_query = ParserContext::create_with_dialect(
query_str,
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_with_binary_format() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT WITH (FORMAT binary)";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT WITH (FORMAT binary) should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_with_csv_format() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT WITH (FORMAT csv)";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT WITH (FORMAT csv) should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_with_equals_format() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT WITH (FORMAT = 'binary')";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT WITH (FORMAT = 'binary') should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_with_multiple_options() {
let sql =
"COPY (SELECT * FROM test_table) TO STDOUT WITH (FORMAT csv, DELIMITER ',', HEADER)";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT WITH multiple options should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_without_with_keyword() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT (FORMAT binary)";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT (FORMAT binary) without WITH keyword should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_without_with_csv_format() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT (FORMAT csv)";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT (FORMAT csv) without WITH keyword should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_copy_query_to_stdout_without_with_multiple_options() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT (FORMAT csv, DELIMITER ',', HEADER)";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
if let Err(e) = &result {
panic!(
"COPY TO STDOUT (FORMAT csv, ...) without WITH keyword should parse without error, got: {:?}",
e
);
}
let stmt = result.unwrap().pop().unwrap();
let expected_query = ParserContext::create_with_dialect(
"SELECT * FROM test_table",
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.remove(0);
assert_eq!(&expected_query, &stmt);
}
#[test]
fn test_invalid_copy_query() {
let sql = "COPY (SELECT * FROM test_table) TO STDOUT (FORMAT csv";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
assert!(result.is_err());
let sql = "COPY (SELECT * FROM test_table) TO STDOUT (FORMAT csv))";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
assert!(result.is_err());
}
}