refactor: upgrade DataFusion, Arrow and Sqlparser (#1074)

* refactor: upgrade DataFusion, Arrow and Sqlparser

* fix: resolve PR comments
This commit is contained in:
LFC
2023-02-27 22:20:08 +08:00
committed by GitHub
parent 30287e7e41
commit 11d45e2918
115 changed files with 1368 additions and 1000 deletions

204
Cargo.lock generated
View File

@@ -190,11 +190,12 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fe17dc0113da7e2eaeaedbd304d347aa8ea64916d225b79a5c3f3b6b5d8da4c"
checksum = "f3724c874f1517cf898cd1c3ad18ab5071edf893c48e73139ab1e16cf0f2affe"
dependencies = [
"ahash 0.8.3",
"arrow-arith",
"arrow-array",
"arrow-buffer",
"arrow-cast",
@@ -203,23 +204,33 @@ dependencies = [
"arrow-ipc",
"arrow-json",
"arrow-ord",
"arrow-row",
"arrow-schema",
"arrow-select",
"arrow-string",
"chrono",
"comfy-table",
]
[[package]]
name = "arrow-arith"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e958823b8383ca14d0a2e973de478dd7674cd9f72837f8c41c132a0fda6a4e5e"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
"half 2.2.1",
"hashbrown 0.13.2",
"multiversion",
"num",
"regex",
]
[[package]]
name = "arrow-array"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9452131e027aec3276e43449162af084db611c42ef875e54d231e6580bc6254"
checksum = "db670eab50e76654065b5aed930f4367101fcddcb2223802007d1e0b4d5a2579"
dependencies = [
"ahash 0.8.3",
"arrow-buffer",
@@ -233,9 +244,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a301001e8ed7da638a12fa579ac5f3f154c44c0655f2ca6ed0f8586b418a779"
checksum = "9f0e01c931882448c0407bd32311a624b9f099739e94e786af68adc97016b5f2"
dependencies = [
"half 2.2.1",
"num",
@@ -243,9 +254,9 @@ dependencies = [
[[package]]
name = "arrow-cast"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "048c91d067f2eb8cc327f086773e5b0f0d7714780807fc4db09366584e23bac8"
checksum = "4bf35d78836c93f80d9362f3ccb47ff5e2c5ecfc270ff42cdf1ef80334961d44"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -259,9 +270,9 @@ dependencies = [
[[package]]
name = "arrow-csv"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed914cd0006a3bb9cac8136b3098ac7796ad26b82362f00d4f2e7c1a54684b86"
checksum = "0a6aa7c2531d89d01fed8c469a9b1bf97132a0bdf70b4724fe4bbb4537a50880"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -270,6 +281,7 @@ dependencies = [
"arrow-schema",
"chrono",
"csv",
"csv-core",
"lazy_static",
"lexical-core",
"regex",
@@ -277,9 +289,9 @@ dependencies = [
[[package]]
name = "arrow-data"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e59619d9d102e4e6b22087b2bd60c07df76fcb68683620841718f6bc8e8f02cb"
checksum = "ea50db4d1e1e4c2da2bfdea7b6d2722eef64267d5ab680d815f7ae42428057f5"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -289,15 +301,16 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bb6e49945f93a8fbd3ec0568167f42097b56134b88686602b9e639a7042ef38"
checksum = "6ad4c883d509d89f05b2891ad889729f17ab2191b5fd22b0cf3660a28cc40af5"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-ipc",
"arrow-schema",
"base64 0.13.1",
"base64 0.21.0",
"bytes",
"futures",
"proc-macro2",
@@ -311,9 +324,9 @@ dependencies = [
[[package]]
name = "arrow-ipc"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb7ad6d2fa06a1cebdaa213c59fc953b9230e560d8374aba133b572b864ec55e"
checksum = "a4042fe6585155d1ec28a8e4937ec901a3ca7a19a22b9f6cd3f551b935cd84f5"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -325,9 +338,9 @@ dependencies = [
[[package]]
name = "arrow-json"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e22efab3ad70336057660c5e5f2b72e2417e3444c27cb42dc477d678ddd6979"
checksum = "7c907c4ab4f26970a3719dc06e78e8054a01d0c96da3664d23b941e201b33d2b"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -337,15 +350,16 @@ dependencies = [
"chrono",
"half 2.2.1",
"indexmap",
"lexical-core",
"num",
"serde_json",
]
[[package]]
name = "arrow-ord"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e23b623332804a65ad11e7732c351896dcb132c19f8e25d99fdb13b00aae5206"
checksum = "e131b447242a32129efc7932f58ed8931b42f35d8701c1a08f9f524da13b1d3c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -356,19 +370,34 @@ dependencies = [
]
[[package]]
name = "arrow-schema"
version = "29.0.0"
name = "arrow-row"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ef17c144f1253b9864f5a3e8f4c6f1e436bdd52394855d5942f132f776b64e"
checksum = "b591ef70d76f4ac28dd7666093295fece0e5f9298f49af51ea49c001e1635bb6"
dependencies = [
"ahash 0.8.3",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"half 2.2.1",
"hashbrown 0.13.2",
]
[[package]]
name = "arrow-schema"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb327717d87eb94be5eff3b0cb8987f54059d343ee5235abf7f143c85f54cfc8"
dependencies = [
"serde",
]
[[package]]
name = "arrow-select"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2accaf218ff107e3df0ee8f1e09b092249a1cc741c4377858a1470fd27d7096"
checksum = "79d3c389d1cea86793934f31594f914c8547d82e91e3411d4833ad0aac3266a7"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -379,9 +408,9 @@ dependencies = [
[[package]]
name = "arrow-string"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a0954f9e1f45b04815ddacbde72899bf3c03a08fa6c0375f42178c4a01a510"
checksum = "30ee67790496dd310ddbf5096870324431e89aa76453e010020ac29b1184d356"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -478,6 +507,17 @@ dependencies = [
"futures-lite",
]
[[package]]
name = "async-recursion"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b015a331cc64ebd1774ba119538573603427eaace0a1950c423ab971f903796"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "async-stream"
version = "0.3.3"
@@ -1072,6 +1112,7 @@ dependencies = [
"regex",
"serde",
"serde_json",
"session",
"snafu",
"storage",
"table",
@@ -2033,8 +2074,8 @@ dependencies = [
[[package]]
name = "datafusion"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2054,6 +2095,7 @@ dependencies = [
"futures",
"glob",
"hashbrown 0.13.2",
"indexmap",
"itertools",
"lazy_static",
"log",
@@ -2078,11 +2120,12 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"arrow",
"chrono",
"num_cpus",
"object_store",
"parquet",
"sqlparser",
@@ -2090,8 +2133,8 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2102,8 +2145,8 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"arrow",
"async-trait",
@@ -2112,13 +2155,15 @@ dependencies = [
"datafusion-expr",
"datafusion-physical-expr",
"hashbrown 0.13.2",
"itertools",
"log",
"regex-syntax",
]
[[package]]
name = "datafusion-physical-expr"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2132,6 +2177,7 @@ dependencies = [
"datafusion-row",
"half 2.2.1",
"hashbrown 0.13.2",
"indexmap",
"itertools",
"lazy_static",
"md-5",
@@ -2146,8 +2192,8 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"arrow",
"datafusion-common",
@@ -2157,8 +2203,8 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "15.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=4917235a398ae20145c87d20984e6367dc1a0c1e#4917235a398ae20145c87d20984e6367dc1a0c1e"
version = "19.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=fad360df0132a2fcb264a7c07b2b02f0b1dfc644#fad360df0132a2fcb264a7c07b2b02f0b1dfc644"
dependencies = [
"arrow-schema",
"datafusion-common",
@@ -2633,12 +2679,12 @@ checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499"
[[package]]
name = "flatbuffers"
version = "22.9.29"
version = "23.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce016b9901aef3579617931fbb2df8fc9a9f7cb95a16eb8acc8148209bb9e70"
checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619"
dependencies = [
"bitflags",
"thiserror",
"rustc_version 0.4.0",
]
[[package]]
@@ -4113,26 +4159,6 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]]
name = "multiversion"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373"
dependencies = [
"multiversion-macros",
]
[[package]]
name = "multiversion-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "mysql_async"
version = "0.31.2"
@@ -4489,9 +4515,9 @@ dependencies = [
[[package]]
name = "object_store"
version = "0.5.3"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4201837dc4c27a8670f0363b1255cd3845a4f0c521211cced1ed14c1d0cc6d2"
checksum = "1f344e51ec9584d2f51199c0c29c6f73dddd04ade986497875bf8fa2f178caf0"
dependencies = [
"async-trait",
"bytes",
@@ -4741,9 +4767,9 @@ dependencies = [
[[package]]
name = "parquet"
version = "29.0.0"
version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d906343fd18ace6b998d5074697743e8e9358efa8c3c796a1381b98cba813338"
checksum = "b1b076829801167d889795cd1957989055543430fa1469cb1f6e32b789bfc764"
dependencies = [
"ahash 0.8.3",
"arrow-array",
@@ -4753,7 +4779,7 @@ dependencies = [
"arrow-ipc",
"arrow-schema",
"arrow-select",
"base64 0.13.1",
"base64 0.21.0",
"brotli",
"bytes",
"chrono",
@@ -5279,9 +5305,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
[[package]]
name = "proc-macro2"
version = "1.0.47"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
dependencies = [
"unicode-ident",
]
@@ -5317,6 +5343,7 @@ dependencies = [
name = "promql"
version = "0.1.0"
dependencies = [
"async-recursion",
"async-trait",
"bytemuck",
"catalog",
@@ -5358,9 +5385,9 @@ dependencies = [
[[package]]
name = "prost-build"
version = "0.11.3"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e330bf1316db56b12c2bcfa399e8edddd4821965ea25ddb2c134b610b1c1c604"
checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e"
dependencies = [
"bytes",
"heck 0.4.0",
@@ -5490,6 +5517,7 @@ version = "0.1.0"
dependencies = [
"approx_eq",
"arc-swap",
"arrow-schema",
"async-trait",
"catalog",
"chrono",
@@ -7086,6 +7114,7 @@ dependencies = [
"common-catalog",
"common-error",
"common-time",
"datafusion-sql",
"datatypes",
"hex",
"itertools",
@@ -7129,11 +7158,23 @@ dependencies = [
[[package]]
name = "sqlparser"
version = "0.28.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "249ae674b9f636b8ff64d8bfe218774cf05a26de40fd9f358669dccc4c0a9d7d"
checksum = "db67dc6ef36edb658196c3fef0464a80b53dbbc194a904e81f9bd4190f9ecc5b"
dependencies = [
"log",
"sqlparser_derive",
]
[[package]]
name = "sqlparser_derive"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
@@ -7360,6 +7401,8 @@ dependencies = [
name = "substrait"
version = "0.1.0"
dependencies = [
"async-recursion",
"async-trait",
"bytes",
"catalog",
"common-catalog",
@@ -7370,6 +7413,7 @@ dependencies = [
"datatypes",
"futures",
"prost",
"session",
"snafu",
"substrait 0.4.0",
"table",

View File

@@ -48,29 +48,28 @@ edition = "2021"
license = "Apache-2.0"
[workspace.dependencies]
arrow = "29.0"
arrow-array = "29.0"
arrow-flight = "29.0"
arrow-schema = { version = "29.0", features = ["serde"] }
arrow = "33.0"
arrow-array = "33.0"
arrow-flight = "33.0"
arrow-schema = { version = "33.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
# TODO(LFC): Use released Datafusion when it officially dependent on Arrow 29.0
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
futures = "0.3"
futures-util = "0.3"
parquet = "29.0"
parquet = "33.0"
paste = "1.0"
prost = "0.11"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = "0.28"
sqlparser = "0.30"
tokio = { version = "1.24.2", features = ["full"] }
tokio-util = "0.7"
tonic = { version = "0.8", features = ["tls"] }

View File

@@ -208,6 +208,7 @@ fn build_values(column: &ArrayRef) -> Values {
| DataType::Dictionary(_, _)
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _)
| DataType::RunEndEncoded(_, _)
| DataType::Map(_, _) => todo!(),
}
}

View File

@@ -27,6 +27,7 @@ meta-client = { path = "../meta-client" }
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
session = { path = "../session" }
snafu = { version = "0.7", features = ["backtraces"] }
storage = { path = "../storage" }
table = { path = "../table" }

View File

@@ -201,6 +201,9 @@ pub enum Error {
#[snafu(backtrace)]
source: common_catalog::error::Error,
},
#[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
QueryAccessDenied { catalog: String, schema: String },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -246,6 +249,7 @@ impl ErrorExt for Error {
}
Error::Unimplemented { .. } => StatusCode::Unsupported,
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
}
}

View File

@@ -34,6 +34,7 @@ pub mod local;
pub mod remote;
pub mod schema;
pub mod system;
pub mod table_source;
pub mod tables;
/// Represent a list of named catalogs
@@ -107,7 +108,12 @@ pub trait CatalogManager: CatalogList {
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>>;
/// Returns the table by catalog, schema and table name.
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>>;
}
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
@@ -186,7 +192,8 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = if let Some(table) = manager.table(catalog_name, schema_name, table_name)? {
let table = manager.table(catalog_name, schema_name, table_name).await?;
let table = if let Some(table) = table {
table
} else {
let table = engine
@@ -219,7 +226,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
}
/// The number of regions in the datanode node.
pub fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
pub async fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
let mut region_number: u64 = 0;
for catalog_name in catalog_manager.catalog_names()? {
@@ -239,11 +246,13 @@ pub fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
})?;
for table_name in schema.table_names()? {
let table = schema
.table(&table_name)?
.context(error::TableNotFoundSnafu {
table_info: &table_name,
})?;
let table =
schema
.table(&table_name)
.await?
.context(error::TableNotFoundSnafu {
table_info: &table_name,
})?;
let region_numbers = &table.table_info().meta.region_numbers;
region_number += region_numbers.len() as u64;

View File

@@ -345,7 +345,7 @@ impl CatalogManager for LocalCatalogManager {
{
let _lock = self.register_lock.lock().await;
if let Some(existing) = schema.table(&request.table_name)? {
if let Some(existing) = schema.table(&request.table_name).await? {
if existing.table_info().ident.table_id != request.table_id {
error!(
"Unexpected table register request: {:?}, existing: {:?}",
@@ -434,9 +434,10 @@ impl CatalogManager for LocalCatalogManager {
} = &request;
let table_id = self
.catalogs
.table(catalog, schema, table_name)?
.table(catalog, schema, table_name)
.await?
.with_context(|| error::TableNotExistSnafu {
table: format!("{catalog}.{schema}.{table_name}"),
table: format_full_table_name(catalog, schema, table_name),
})?
.table_info()
.ident
@@ -505,7 +506,7 @@ impl CatalogManager for LocalCatalogManager {
.schema(schema)
}
fn table(
async fn table(
&self,
catalog_name: &str,
schema_name: &str,
@@ -521,7 +522,7 @@ impl CatalogManager for LocalCatalogManager {
catalog: catalog_name,
schema: schema_name,
})?;
schema.table(table_name)
schema.table(table_name).await
}
}

View File

@@ -18,6 +18,7 @@ use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_telemetry::error;
use snafu::{ensure, OptionExt};
@@ -155,16 +156,20 @@ impl CatalogManager for MemoryCatalogManager {
}
}
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>> {
let c = self.catalogs.read().unwrap();
let catalog = if let Some(c) = c.get(catalog) {
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let catalog = {
let c = self.catalogs.read().unwrap();
let Some(c) = c.get(catalog) else { return Ok(None) };
c.clone()
} else {
return Ok(None);
};
match catalog.schema(schema)? {
None => Ok(None),
Some(s) => s.table(table_name),
Some(s) => s.table(table_name).await,
}
}
}
@@ -283,6 +288,7 @@ impl Default for MemorySchemaProvider {
}
}
#[async_trait]
impl SchemaProvider for MemorySchemaProvider {
fn as_any(&self) -> &dyn Any {
self
@@ -293,7 +299,7 @@ impl SchemaProvider for MemorySchemaProvider {
Ok(tables.keys().cloned().collect())
}
fn table(&self, name: &str) -> Result<Option<TableRef>> {
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
let tables = self.tables.read().unwrap();
Ok(tables.get(name).cloned())
}
@@ -355,8 +361,8 @@ mod tests {
use super::*;
#[test]
fn test_new_memory_catalog_list() {
#[tokio::test]
async fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_list().unwrap();
let default_catalog = catalog_list.catalog(DEFAULT_CATALOG_NAME).unwrap().unwrap();
@@ -369,9 +375,9 @@ mod tests {
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
let table = default_schema.table("numbers").unwrap();
let table = default_schema.table("numbers").await.unwrap();
assert!(table.is_some());
assert!(default_schema.table("not_exists").unwrap().is_none());
assert!(default_schema.table("not_exists").await.unwrap().is_none());
}
#[tokio::test]
@@ -419,7 +425,7 @@ mod tests {
// test new table name exists
assert!(provider.table_exist(new_table_name).unwrap());
let registered_table = provider.table(new_table_name).unwrap().unwrap();
let registered_table = provider.table(new_table_name).await.unwrap().unwrap();
assert_eq!(
registered_table.table_info().ident.table_id,
test_table.table_info().ident.table_id
@@ -468,6 +474,7 @@ mod tests {
let registered_table = catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);

View File

@@ -19,6 +19,7 @@ use std::sync::Arc;
use arc_swap::ArcSwap;
use async_stream::stream;
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_telemetry::{debug, info};
use futures::Stream;
@@ -468,7 +469,7 @@ impl CatalogManager for RemoteCatalogManager {
.schema(schema)
}
fn table(
async fn table(
&self,
catalog_name: &str,
schema_name: &str,
@@ -483,7 +484,7 @@ impl CatalogManager for RemoteCatalogManager {
catalog: catalog_name,
schema: schema_name,
})?;
schema.table(table_name)
schema.table(table_name).await
}
}
@@ -692,6 +693,7 @@ impl RemoteSchemaProvider {
}
}
#[async_trait]
impl SchemaProvider for RemoteSchemaProvider {
fn as_any(&self) -> &dyn Any {
self
@@ -701,7 +703,7 @@ impl SchemaProvider for RemoteSchemaProvider {
Ok(self.tables.load().keys().cloned().collect::<Vec<_>>())
}
fn table(&self, name: &str) -> Result<Option<TableRef>> {
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
Ok(self.tables.load().get(name).cloned())
}

View File

@@ -15,11 +15,13 @@
use std::any::Any;
use std::sync::Arc;
use async_trait::async_trait;
use table::TableRef;
use crate::error::Result;
/// Represents a schema, comprising a number of named tables.
#[async_trait]
pub trait SchemaProvider: Sync + Send {
/// Returns the schema provider as [`Any`](std::any::Any)
/// so that it can be downcast to a specific implementation.
@@ -29,7 +31,7 @@ pub trait SchemaProvider: Sync + Send {
fn table_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific table from the schema by name, provided it exists.
fn table(&self, name: &str) -> Result<Option<TableRef>>;
async fn table(&self, name: &str) -> Result<Option<TableRef>>;
/// If supported by the implementation, adds a new table to this schema.
/// If a table of the same name existed before, it returns "Table already exists" error.

View File

@@ -0,0 +1,178 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::format_full_table_name;
use datafusion::common::{OwnedTableReference, ResolvedTableReference, TableReference};
use datafusion::datasource::provider_as_source;
use datafusion::logical_expr::TableSource;
use session::context::QueryContext;
use snafu::{ensure, OptionExt};
use table::table::adapter::DfTableProviderAdapter;
use crate::error::{
CatalogNotFoundSnafu, QueryAccessDeniedSnafu, Result, SchemaNotFoundSnafu, TableNotExistSnafu,
};
use crate::CatalogListRef;
pub struct DfTableSourceProvider {
catalog_list: CatalogListRef,
resolved_tables: HashMap<String, Arc<dyn TableSource>>,
disallow_cross_schema_query: bool,
default_catalog: String,
default_schema: String,
}
impl DfTableSourceProvider {
pub fn new(
catalog_list: CatalogListRef,
disallow_cross_schema_query: bool,
query_ctx: &QueryContext,
) -> Self {
Self {
catalog_list,
disallow_cross_schema_query,
resolved_tables: HashMap::new(),
default_catalog: query_ctx.current_catalog(),
default_schema: query_ctx.current_schema(),
}
}
pub fn resolve_table_ref<'a>(
&'a self,
table_ref: TableReference<'a>,
) -> Result<ResolvedTableReference<'a>> {
if self.disallow_cross_schema_query {
match &table_ref {
TableReference::Bare { .. } => (),
TableReference::Partial { schema, .. } => {
ensure!(
schema.as_ref() == self.default_schema,
QueryAccessDeniedSnafu {
catalog: &self.default_catalog,
schema: schema.as_ref(),
}
);
}
TableReference::Full {
catalog, schema, ..
} => {
ensure!(
catalog.as_ref() == self.default_catalog
&& schema.as_ref() == self.default_schema,
QueryAccessDeniedSnafu {
catalog: catalog.as_ref(),
schema: schema.as_ref()
}
);
}
};
}
Ok(table_ref.resolve(&self.default_catalog, &self.default_schema))
}
pub async fn resolve_table(
&mut self,
table_ref: OwnedTableReference,
) -> Result<Arc<dyn TableSource>> {
let table_ref = table_ref.as_table_reference();
let table_ref = self.resolve_table_ref(table_ref)?;
let resolved_name = table_ref.to_string();
if let Some(table) = self.resolved_tables.get(&resolved_name) {
return Ok(table.clone());
}
let catalog_name = table_ref.catalog.as_ref();
let schema_name = table_ref.schema.as_ref();
let table_name = table_ref.table.as_ref();
let catalog = self
.catalog_list
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog.schema(schema_name)?.context(SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?;
let table = schema
.table(table_name)
.await?
.with_context(|| TableNotExistSnafu {
table: format_full_table_name(catalog_name, schema_name, table_name),
})?;
let table = DfTableProviderAdapter::new(table);
let table = provider_as_source(Arc::new(table));
self.resolved_tables.insert(resolved_name, table.clone());
Ok(table)
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use session::context::QueryContext;
use super::*;
use crate::local::MemoryCatalogManager;
#[test]
fn test_validate_table_ref() {
let query_ctx = &QueryContext::with("greptime", "public");
let table_provider =
DfTableSourceProvider::new(Arc::new(MemoryCatalogManager::default()), true, query_ctx);
let table_ref = TableReference::Bare {
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("wrong_schema"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_err());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("wrong_catalog"),
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_err());
}
}

View File

@@ -20,6 +20,7 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use async_stream::stream;
use async_trait::async_trait;
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use common_error::ext::BoxedError;
use common_query::logical_plan::Expr;
@@ -200,6 +201,7 @@ pub struct InformationSchema {
pub system: Arc<SystemCatalogTable>,
}
#[async_trait]
impl SchemaProvider for InformationSchema {
fn as_any(&self) -> &dyn Any {
self
@@ -212,7 +214,7 @@ impl SchemaProvider for InformationSchema {
])
}
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
if name.eq_ignore_ascii_case("tables") {
Ok(Some(self.tables.clone()))
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {

View File

@@ -71,6 +71,7 @@ mod tests {
let registered_table = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);
@@ -158,6 +159,7 @@ mod tests {
let table = guard.as_ref().unwrap();
let table_registered = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
.await
.unwrap()
.unwrap();
assert_eq!(

View File

@@ -118,7 +118,7 @@ impl Database {
request: Some(request),
};
let request = Ticket {
ticket: request.encode_to_vec(),
ticket: request.encode_to_vec().into(),
};
let mut client = self.client.make_client()?;

View File

@@ -20,6 +20,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
pub struct Bytes(bytes::Bytes);
impl From<Bytes> for bytes::Bytes {
fn from(value: Bytes) -> Self {
value.0
}
}
impl From<bytes::Bytes> for Bytes {
fn from(bytes: bytes::Bytes) -> Bytes {
Bytes(bytes)

View File

@@ -16,7 +16,7 @@ common-runtime = { path = "../runtime" }
dashmap = "5.4"
datafusion.workspace = true
datatypes = { path = "../../datatypes" }
flatbuffers = "22"
flatbuffers = "23.1"
futures = "0.3"
prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -16,8 +16,9 @@ use std::collections::HashMap;
use std::sync::Arc;
use api::v1::{AffectedRows, FlightMetadata};
use arrow_flight::utils::{flight_data_from_arrow_batch, flight_data_to_arrow_batch};
use arrow_flight::utils::flight_data_to_arrow_batch;
use arrow_flight::{FlightData, IpcMessage, SchemaAsIpc};
use common_base::bytes::Bytes;
use common_recordbatch::{RecordBatch, RecordBatches};
use datatypes::arrow;
use datatypes::arrow::datatypes::Schema as ArrowSchema;
@@ -39,38 +40,58 @@ pub enum FlightMessage {
AffectedRows(usize),
}
#[derive(Default)]
pub struct FlightEncoder {
write_options: writer::IpcWriteOptions,
data_gen: writer::IpcDataGenerator,
dictionary_tracker: writer::DictionaryTracker,
}
impl Default for FlightEncoder {
fn default() -> Self {
Self {
write_options: writer::IpcWriteOptions::default(),
data_gen: writer::IpcDataGenerator::default(),
dictionary_tracker: writer::DictionaryTracker::new(false),
}
}
}
impl FlightEncoder {
pub fn encode(&self, flight_message: FlightMessage) -> FlightData {
pub fn encode(&mut self, flight_message: FlightMessage) -> FlightData {
match flight_message {
FlightMessage::Schema(schema) => {
SchemaAsIpc::new(schema.arrow_schema(), &self.write_options).into()
}
FlightMessage::Recordbatch(recordbatch) => {
let (flight_dictionaries, flight_batch) = flight_data_from_arrow_batch(
recordbatch.df_record_batch(),
&self.write_options,
);
let (encoded_dictionaries, encoded_batch) = self
.data_gen
.encoded_batch(
recordbatch.df_record_batch(),
&mut self.dictionary_tracker,
&self.write_options,
)
.expect("DictionaryTracker configured above to not fail on replacement");
// TODO(LFC): Handle dictionary as FlightData here, when we supported Arrow's Dictionary DataType.
// Currently we don't have a datatype corresponding to Arrow's Dictionary DataType,
// so there won't be any "dictionaries" here. Assert to be sure about it, and
// perform a "testing guard" in case we forgot to handle the possible "dictionaries"
// here in the future.
debug_assert_eq!(flight_dictionaries.len(), 0);
debug_assert_eq!(encoded_dictionaries.len(), 0);
flight_batch
encoded_batch.into()
}
FlightMessage::AffectedRows(rows) => {
let metadata = FlightMetadata {
affected_rows: Some(AffectedRows { value: rows as _ }),
}
.encode_to_vec();
FlightData::new(None, IpcMessage(build_none_flight_msg()), metadata, vec![])
FlightData::new(
None,
IpcMessage(build_none_flight_msg().into()),
metadata,
vec![],
)
}
}
}
@@ -83,7 +104,8 @@ pub struct FlightDecoder {
impl FlightDecoder {
pub fn try_decode(&mut self, flight_data: FlightData) -> Result<FlightMessage> {
let message = root_as_message(flight_data.data_header.as_slice()).map_err(|e| {
let bytes = flight_data.data_header.slice(..);
let message = root_as_message(&bytes).map_err(|e| {
InvalidFlightDataSnafu {
reason: e.to_string(),
}
@@ -91,7 +113,7 @@ impl FlightDecoder {
})?;
match message.header_type() {
MessageHeader::NONE => {
let metadata = FlightMetadata::decode(flight_data.app_metadata.as_slice())
let metadata = FlightMetadata::decode(flight_data.app_metadata)
.context(DecodeFlightDataSnafu)?;
if let Some(AffectedRows { value }) = metadata.affected_rows {
return Ok(FlightMessage::AffectedRows(value as _));
@@ -176,7 +198,7 @@ pub fn flight_messages_to_recordbatches(messages: Vec<FlightMessage>) -> Result<
}
}
fn build_none_flight_msg() -> Vec<u8> {
fn build_none_flight_msg() -> Bytes {
let mut builder = FlatBufferBuilder::new();
let mut message = arrow::ipc::MessageBuilder::new(&mut builder);
@@ -187,7 +209,7 @@ fn build_none_flight_msg() -> Vec<u8> {
let data = message.finish();
builder.finish(data, None);
builder.finished_data().to_vec()
builder.finished_data().into()
}
#[cfg(test)]

View File

@@ -239,7 +239,6 @@ impl From<BoxedError> for Error {
#[cfg(test)]
mod tests {
use datatypes::arrow::error::ArrowError;
use snafu::GenerateImplicitData;
use super::*;
@@ -286,7 +285,7 @@ mod tests {
fn test_convert_df_recordbatch_stream_error() {
let result: std::result::Result<i32, common_recordbatch::error::Error> =
Err(common_recordbatch::error::Error::PollStream {
source: ArrowError::DivideByZero,
source: DataFusionError::Internal("blabla".to_string()),
backtrace: Backtrace::generate(),
});
let error = result

View File

@@ -315,7 +315,11 @@ mod test {
.unwrap()
.build()
.unwrap();
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
let physical_plan = ctx
.state()
.create_physical_plan(&logical_plan)
.await
.unwrap();
let df_recordbatches = collect(physical_plan, Arc::new(TaskContext::from(&ctx)))
.await
.unwrap();

View File

@@ -18,9 +18,9 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::error::Result as DfResult;
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
use datafusion_common::DataFusionError;
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
use datatypes::schema::{Schema, SchemaRef};
use futures::ready;
use snafu::ResultExt;
@@ -57,14 +57,14 @@ impl DfRecordBatchStream for DfRecordBatchStreamAdapter {
}
impl Stream for DfRecordBatchStreamAdapter {
type Item = ArrowResult<DfRecordBatch>;
type Item = DfResult<DfRecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.stream).poll_next(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(Some(recordbatch)) => match recordbatch {
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.into_df_record_batch()))),
Err(e) => Poll::Ready(Some(Err(ArrowError::ExternalError(Box::new(e))))),
Err(e) => Poll::Ready(Some(Err(DataFusionError::External(Box::new(e))))),
},
Poll::Ready(None) => Poll::Ready(None),
}

View File

@@ -55,7 +55,7 @@ pub enum Error {
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datatypes::arrow::error::ArrowError,
source: datafusion::error::DataFusionError,
backtrace: Backtrace,
},

View File

@@ -5,6 +5,8 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-recursion = "1.0"
async-trait.workspace = true
bytes = "1.1"
catalog = { path = "../../catalog" }
common-catalog = { path = "../catalog" }
@@ -15,6 +17,7 @@ datafusion-expr.workspace = true
datatypes = { path = "../../datatypes" }
futures = "0.3"
prost.workspace = true
session = { path = "../../session" }
snafu.workspace = true
table = { path = "../../table" }

View File

@@ -635,8 +635,6 @@ mod utils {
Operator::Modulo => "modulo",
Operator::And => "and",
Operator::Or => "or",
Operator::Like => "like",
Operator::NotLike => "not_like",
Operator::IsDistinctFrom => "is_distinct_from",
Operator::IsNotDistinctFrom => "is_not_distinct_from",
Operator::RegexMatch => "regex_match",
@@ -649,8 +647,6 @@ mod utils {
Operator::BitwiseShiftRight => "bitwise_shift_right",
Operator::BitwiseShiftLeft => "bitwise_shift_left",
Operator::StringConcat => "string_concat",
Operator::ILike => "i_like",
Operator::NotILike => "not_i_like",
}
}

View File

@@ -14,16 +14,20 @@
use std::sync::Arc;
use async_recursion::async_recursion;
use async_trait::async_trait;
use bytes::{Buf, Bytes, BytesMut};
use catalog::table_source::DfTableSourceProvider;
use catalog::CatalogManagerRef;
use common_error::prelude::BoxedError;
use common_catalog::format_full_table_name;
use common_telemetry::debug;
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
use datafusion::common::{DFField, DFSchema};
use datafusion::common::{DFField, DFSchema, OwnedTableReference};
use datafusion::datasource::DefaultTableSource;
use datafusion::physical_plan::project_schema;
use datafusion_expr::{Filter, LogicalPlan, TableScan, TableSource};
use datafusion_expr::{Filter, LogicalPlan, TableScan};
use prost::Message;
use session::context::QueryContext;
use snafu::{ensure, OptionExt, ResultExt};
use substrait_proto::proto::expression::mask_expression::{StructItem, StructSelect};
use substrait_proto::proto::expression::MaskExpression;
@@ -37,8 +41,8 @@ use table::table::adapter::DfTableProviderAdapter;
use crate::context::ConvertorContext;
use crate::df_expr::{expression_from_df_expr, to_df_expr};
use crate::error::{
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error,
InvalidParametersSnafu, MissingFieldSnafu, ResolveTableSnafu, SchemaNotMatchSnafu,
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
};
use crate::schema::{from_schema, to_schema};
@@ -46,18 +50,19 @@ use crate::SubstraitPlan;
pub struct DFLogicalSubstraitConvertor;
#[async_trait]
impl SubstraitPlan for DFLogicalSubstraitConvertor {
type Error = Error;
type Plan = LogicalPlan;
fn decode<B: Buf + Send>(
async fn decode<B: Buf + Send>(
&self,
message: B,
catalog_manager: CatalogManagerRef,
) -> Result<Self::Plan, Self::Error> {
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
self.convert_plan(plan, catalog_manager)
self.convert_plan(plan, catalog_manager).await
}
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
@@ -71,7 +76,7 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
}
impl DFLogicalSubstraitConvertor {
fn convert_plan(
async fn convert_plan(
&self,
mut plan: Plan,
catalog_manager: CatalogManagerRef,
@@ -102,20 +107,25 @@ impl DFLogicalSubstraitConvertor {
.fail()?
};
self.rel_to_logical_plan(&mut ctx, Box::new(rel), catalog_manager)
// TODO(LFC): Create table provider from outside, respect "disallow_cross_schema_query" option in query engine state.
let mut table_provider =
DfTableSourceProvider::new(catalog_manager, false, &QueryContext::new());
self.rel_to_logical_plan(&mut ctx, Box::new(rel), &mut table_provider)
.await
}
fn rel_to_logical_plan(
#[async_recursion]
async fn rel_to_logical_plan(
&self,
ctx: &mut ConvertorContext,
rel: Box<Rel>,
catalog_manager: CatalogManagerRef,
table_provider: &mut DfTableSourceProvider,
) -> Result<LogicalPlan, Error> {
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
// build logical plan
let logical_plan = match rel_type {
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, catalog_manager)?,
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, table_provider).await?,
RelType::Filter(filter) => {
let FilterRel {
common: _,
@@ -128,7 +138,7 @@ impl DFLogicalSubstraitConvertor {
field: "input",
plan: "Filter",
})?;
let input = Arc::new(self.rel_to_logical_plan(ctx, input, catalog_manager)?);
let input = Arc::new(self.rel_to_logical_plan(ctx, input, table_provider).await?);
let condition = condition.context(MissingFieldSnafu {
field: "condition",
@@ -191,11 +201,11 @@ impl DFLogicalSubstraitConvertor {
Ok(logical_plan)
}
fn convert_read_rel(
async fn convert_read_rel(
&self,
ctx: &mut ConvertorContext,
read_rel: Box<ReadRel>,
catalog_manager: CatalogManagerRef,
table_provider: &mut DfTableSourceProvider,
) -> Result<LogicalPlan, Error> {
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
let read_type = read_rel.read_type.context(MissingFieldSnafu {
@@ -230,17 +240,17 @@ impl DFLogicalSubstraitConvertor {
.projection
.map(|mask_expr| self.convert_mask_expression(mask_expr));
// Get table handle from catalog manager
let table_ref = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.map_err(BoxedError::new)
.context(InternalSnafu)?
.context(TableNotFoundSnafu {
name: format!("{catalog_name}.{schema_name}.{table_name}"),
let table_ref = OwnedTableReference::Full {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table: table_name.clone(),
};
let adapter = table_provider
.resolve_table(table_ref)
.await
.with_context(|_| ResolveTableSnafu {
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
})?;
let adapter = Arc::new(DefaultTableSource::new(Arc::new(
DfTableProviderAdapter::new(table_ref),
)));
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
let stored_schema = adapter.schema();
@@ -262,7 +272,7 @@ impl DFLogicalSubstraitConvertor {
};
// Calculate the projected schema
let qualified = &format!("{catalog_name}.{schema_name}.{table_name}");
let qualified = &format_full_table_name(&catalog_name, &schema_name, &table_name);
let projected_schema = Arc::new(
project_schema(&stored_schema, projection.as_ref())
.and_then(|x| {
@@ -281,7 +291,7 @@ impl DFLogicalSubstraitConvertor {
// TODO(ruihang): Support limit(fetch)
Ok(LogicalPlan::TableScan(TableScan {
table_name: format!("{catalog_name}.{schema_name}.{table_name}"),
table_name: qualified.to_string(),
source: adapter,
projection,
projected_schema,
@@ -314,7 +324,7 @@ impl DFLogicalSubstraitConvertor {
.fail()?,
LogicalPlan::Filter(filter) => {
let input = Some(Box::new(
self.logical_plan_to_rel(ctx, filter.input().clone())?,
self.logical_plan_to_rel(ctx, filter.input.clone())?,
));
let schema = plan
@@ -324,7 +334,7 @@ impl DFLogicalSubstraitConvertor {
.context(error::ConvertDfSchemaSnafu)?;
let condition = Some(Box::new(expression_from_df_expr(
ctx,
filter.predicate(),
&filter.predicate,
&schema,
)?));
@@ -396,7 +406,10 @@ impl DFLogicalSubstraitConvertor {
| LogicalPlan::Explain(_)
| LogicalPlan::Analyze(_)
| LogicalPlan::Extension(_)
| LogicalPlan::Prepare(_) => InvalidParametersSnafu {
| LogicalPlan::Prepare(_)
| LogicalPlan::Dml(_)
| LogicalPlan::DescribeTable(_)
| LogicalPlan::Unnest(_) => InvalidParametersSnafu {
reason: format!(
"Trying to convert DDL/DML plan to substrait proto, plan: {plan:?}",
),
@@ -524,6 +537,7 @@ mod test {
use catalog::{CatalogList, CatalogProvider, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datafusion::common::{DFSchema, ToDFSchema};
use datafusion_expr::TableSource;
use datatypes::schema::RawSchema;
use table::requests::CreateTableRequest;
use table::test_util::{EmptyTable, MockTableEngine};
@@ -572,7 +586,7 @@ mod test {
let convertor = DFLogicalSubstraitConvertor;
let proto = convertor.encode(plan.clone()).unwrap();
let tripped_plan = convertor.decode(proto, catalog).unwrap();
let tripped_plan = convertor.decode(proto, catalog).await.unwrap();
assert_eq!(format!("{plan:?}"), format!("{tripped_plan:?}"));
}

View File

@@ -105,6 +105,13 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Unable to resolve table: {table_name}, error: {source}"))]
ResolveTable {
table_name: String,
#[snafu(backtrace)]
source: catalog::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -127,6 +134,7 @@ impl ErrorExt for Error {
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
Error::ConvertDfSchema { source } => source.status_code(),
Error::ResolveTable { source, .. } => source.status_code(),
}
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
#![feature(let_chains)]
#![feature(trait_upcasting)]
mod context;
mod df_expr;
@@ -21,17 +22,19 @@ pub mod error;
mod schema;
mod types;
use async_trait::async_trait;
use bytes::{Buf, Bytes};
use catalog::CatalogManagerRef;
pub use crate::df_logical::DFLogicalSubstraitConvertor;
#[async_trait]
pub trait SubstraitPlan {
type Error: std::error::Error;
type Plan;
fn decode<B: Buf + Send>(
async fn decode<B: Buf + Send>(
&self,
message: B,
catalog_manager: CatalogManagerRef,

View File

@@ -28,6 +28,7 @@ common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"

View File

@@ -379,7 +379,7 @@ pub enum Error {
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datatypes::arrow::error::ArrowError,
source: datafusion_common::DataFusionError,
backtrace: Backtrace,
},

View File

@@ -106,7 +106,7 @@ impl HeartbeatTask {
let mut tx = Self::create_streams(&meta_client, running.clone()).await?;
common_runtime::spawn_bg(async move {
while running.load(Ordering::Acquire) {
let region_num = match region_number(&catalog_manager_clone) {
let region_num = match region_number(&catalog_manager_clone).await {
Ok(region_num) => region_num as i64,
Err(e) => {
error!("failed to get region number, err: {e:?}");

View File

@@ -45,6 +45,7 @@ impl Instance {
pub(crate) async fn execute_logical(&self, plan_bytes: Vec<u8>) -> Result<Output> {
let logical_plan = DFLogicalSubstraitConvertor
.decode(plan_bytes.as_slice(), self.catalog_manager.clone())
.await
.context(DecodeLogicalPlanSnafu)?;
self.query_engine
@@ -74,6 +75,7 @@ impl Instance {
let table = self
.catalog_manager
.table(catalog, schema, table_name)
.await
.context(error::CatalogSnafu)?
.context(error::TableNotFoundSnafu { table_name })?;
@@ -287,9 +289,9 @@ mod test {
+---------------------+-------+-----+
| ts | host | cpu |
+---------------------+-------+-----+
| 2022-12-30T07:09:00 | host1 | 1 |
| 2022-12-30T07:09:00 | host1 | 1.0 |
| 2022-12-30T07:09:01 | host2 | |
| 2022-12-30T07:09:02 | host3 | 3 |
| 2022-12-30T07:09:02 | host3 | 3.0 |
+---------------------+-------+-----+";
assert_eq!(recordbatches.pretty_print().unwrap(), expected);
}
@@ -325,7 +327,7 @@ mod test {
+---------------------+-------+------+--------+
| ts | host | cpu | memory |
+---------------------+-------+------+--------+
| 2022-12-28T04:17:05 | host1 | 66.6 | 1024 |
| 2022-12-28T04:17:05 | host1 | 66.6 | 1024.0 |
| 2022-12-28T04:17:06 | host2 | 88.8 | 333.3 |
+---------------------+-------+------+--------+";
let actual = recordbatch.pretty_print().unwrap();

View File

@@ -51,6 +51,7 @@ impl Instance {
let logical_plan = self
.query_engine
.statement_to_plan(stmt, query_ctx)
.await
.context(ExecuteSqlSnafu)?;
self.query_engine
@@ -216,6 +217,7 @@ impl Instance {
let logical_plan = self
.query_engine
.statement_to_plan(stmt, query_ctx)
.await
.context(ExecuteSqlSnafu)?;
self.query_engine
@@ -335,10 +337,15 @@ impl SqlQueryHandler for Instance {
.await
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
if let Statement::Query(_) = stmt {
self.query_engine
.describe(QueryStatement::Sql(stmt), query_ctx)
.await
.map(Some)
.context(error::DescribeStatementSnafu)
} else {

View File

@@ -105,6 +105,7 @@ impl SqlHandler {
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(error::CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: req.name().to_string(),
@@ -244,7 +245,7 @@ mod tests {
.unwrap(),
);
catalog_list.start().await.unwrap();
catalog_list
assert!(catalog_list
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
@@ -253,7 +254,7 @@ mod tests {
table: Arc::new(DemoTable),
})
.await
.unwrap();
.unwrap());
let factory = QueryEngineFactory::new(catalog_list.clone());
let query_engine = factory.query_engine();

View File

@@ -15,6 +15,7 @@ use std::collections::HashMap;
use std::pin::Pin;
use catalog::CatalogManagerRef;
use common_catalog::format_full_table_name;
use common_query::Output;
use common_recordbatch::RecordBatch;
use datafusion_expr::type_coercion::binary::coerce_types;
@@ -239,6 +240,7 @@ impl SqlHandler {
QueryStatement::Sql(Statement::Query(Box::new(query))),
query_ctx.clone(),
)
.await
.context(ExecuteSqlSnafu)?;
let output = self
@@ -284,9 +286,10 @@ impl SqlHandler {
let table = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: table_name.clone(),
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
})?;
if stmt.is_insert_select() {

View File

@@ -236,7 +236,7 @@ async fn test_execute_insert_by_select() {
+-------+------+--------+---------------------+
| host | cpu | memory | ts |
+-------+------+--------+---------------------+
| host1 | 66.6 | 1024 | 2022-06-15T07:02:37 |
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 |
+-------+------+--------+---------------------+"
.to_string();
@@ -457,8 +457,8 @@ async fn test_rename_table() {
+-------+-----+--------+---------------------+
| host | cpu | memory | ts |
+-------+-----+--------+---------------------+
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 |
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 |
| host1 | 1.1 | 100.0 | 1970-01-01T00:00:01 |
| host2 | 2.2 | 200.0 | 1970-01-01T00:00:02 |
+-------+-----+--------+---------------------+\
"
.to_string();
@@ -559,9 +559,9 @@ async fn test_alter_table() {
+-------+-----+--------+---------------------+--------+
| host | cpu | memory | ts | my_tag |
+-------+-----+--------+---------------------+--------+
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 300 | 1970-01-01T00:00:03 | |
| host1 | 1.1 | 100.0 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 200.0 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 300.0 | 1970-01-01T00:00:03 | |
+-------+-----+--------+---------------------+--------+\
"
.to_string();
@@ -594,14 +594,14 @@ async fn test_alter_table() {
let output = execute_sql(&instance, "select * from demo order by ts").await;
let expected = "\
+-------+-----+---------------------+--------+
| host | cpu | ts | my_tag |
+-------+-----+---------------------+--------+
| host1 | 1.1 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 1970-01-01T00:00:03 | |
| host4 | 400 | 1970-01-01T00:00:04 | world |
+-------+-----+---------------------+--------+\
+-------+-------+---------------------+--------+
| host | cpu | ts | my_tag |
+-------+-------+---------------------+--------+
| host1 | 1.1 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 1970-01-01T00:00:03 | |
| host4 | 400.0 | 1970-01-01T00:00:04 | world |
+-------+-------+---------------------+--------+\
"
.to_string();
check_output_stream(output, expected).await;
@@ -757,8 +757,8 @@ async fn test_delete() {
+-------+---------------------+------+--------+
| host | ts | cpu | memory |
+-------+---------------------+------+--------+
| host2 | 2022-06-15T07:02:38 | 77.7 | 2048 |
| host3 | 2022-06-15T07:02:39 | 88.8 | 3072 |
| host2 | 2022-06-15T07:02:38 | 77.7 | 2048.0 |
| host3 | 2022-06-15T07:02:39 | 88.8 | 3072.0 |
+-------+---------------------+------+--------+\
"
.to_string();

View File

@@ -106,17 +106,17 @@ async fn sql_insert_tql_query_ceil() {
"+---------------------+-----------+--------------+-------+\
\n| ts | ceil(cpu) | ceil(memory) | host |\
\n+---------------------+-----------+--------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:20 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:30 | 32 | 8192 | host1 |\
\n| 1970-01-01T00:00:40 | 96 | 334 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:00 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:10 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:30 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n| 1970-01-01T00:00:00 | 67.0 | 1024.0 | host1 |\
\n| 1970-01-01T00:00:10 | 100.0 | 20480.0 | host1 |\
\n| 1970-01-01T00:00:20 | 100.0 | 20480.0 | host1 |\
\n| 1970-01-01T00:00:30 | 32.0 | 8192.0 | host1 |\
\n| 1970-01-01T00:00:40 | 96.0 | 334.0 | host1 |\
\n| 1970-01-01T00:00:50 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:00 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:10 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:20 | 0.0 | 2334.0 | host1 |\
\n| 1970-01-01T00:01:30 | 0.0 | 2334.0 | host1 |\
\n| 1970-01-01T00:01:40 | 49.0 | 3334.0 | host1 |\
\n+---------------------+-----------+--------------+-------+",
)
.await;
@@ -154,12 +154,12 @@ async fn sql_insert_promql_query_ceil() {
"+---------------------+-----------+--------------+-------+\
\n| ts | ceil(cpu) | ceil(memory) | host |\
\n+---------------------+-----------+--------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:05 | 67 | 4096 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n| 1970-01-01T00:00:00 | 67.0 | 1024.0 | host1 |\
\n| 1970-01-01T00:00:05 | 67.0 | 4096.0 | host1 |\
\n| 1970-01-01T00:00:10 | 100.0 | 20480.0 | host1 |\
\n| 1970-01-01T00:00:50 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:20 | 0.0 | 2334.0 | host1 |\
\n| 1970-01-01T00:01:40 | 49.0 | 3334.0 | host1 |\
\n+---------------------+-----------+--------------+-------+",
)
.await;
@@ -214,8 +214,8 @@ async fn aggregators_simple_sum() {
"+------------+---------------------+--------------------------+\
\n| group | ts | SUM(http_requests.value) |\
\n+------------+---------------------+--------------------------+\
\n| production | 1970-01-01T00:00:00 | 300 |\
\n| canary | 1970-01-01T00:00:00 | 700 |\
\n| production | 1970-01-01T00:00:00 | 300.0 |\
\n| canary | 1970-01-01T00:00:00 | 700.0 |\
\n+------------+---------------------+--------------------------+",
)
.await;
@@ -238,8 +238,8 @@ async fn aggregators_simple_avg() {
"+------------+---------------------+--------------------------+\
\n| group | ts | AVG(http_requests.value) |\
\n+------------+---------------------+--------------------------+\
\n| production | 1970-01-01T00:00:00 | 150 |\
\n| canary | 1970-01-01T00:00:00 | 350 |\
\n| production | 1970-01-01T00:00:00 | 150.0 |\
\n| canary | 1970-01-01T00:00:00 | 350.0 |\
\n+------------+---------------------+--------------------------+",
)
.await;
@@ -286,8 +286,8 @@ async fn aggregators_simple_without() {
"+------------+------------+---------------------+--------------------------+\
\n| group | job | ts | SUM(http_requests.value) |\
\n+------------+------------+---------------------+--------------------------+\
\n| production | api-server | 1970-01-01T00:00:00 | 300 |\
\n| canary | api-server | 1970-01-01T00:00:00 | 700 |\
\n| production | api-server | 1970-01-01T00:00:00 | 300.0 |\
\n| canary | api-server | 1970-01-01T00:00:00 | 700.0 |\
\n+------------+------------+---------------------+--------------------------+",
)
.await;
@@ -309,7 +309,7 @@ async fn aggregators_empty_by() {
"+---------------------+--------------------------+\
\n| ts | SUM(http_requests.value) |\
\n+---------------------+--------------------------+\
\n| 1970-01-01T00:00:00 | 1000 |\
\n| 1970-01-01T00:00:00 | 1000.0 |\
\n+---------------------+--------------------------+",
)
.await;
@@ -331,7 +331,7 @@ async fn aggregators_no_by_without() {
"+---------------------+--------------------------+\
\n| ts | SUM(http_requests.value) |\
\n+---------------------+--------------------------+\
\n| 1970-01-01T00:00:00 | 1000 |\
\n| 1970-01-01T00:00:00 | 1000.0 |\
\n+---------------------+--------------------------+",
)
.await;
@@ -354,8 +354,8 @@ async fn aggregators_empty_without() {
"+------------+----------+------------+---------------------+--------------------------+\
\n| group | instance | job | ts | SUM(http_requests.value) |\
\n+------------+----------+------------+---------------------+--------------------------+\
\n| production | 0 | api-server | 1970-01-01T00:00:00 | 100 |\
\n| production | 1 | api-server | 1970-01-01T00:00:00 | 200 |\
\n| production | 0 | api-server | 1970-01-01T00:00:00 | 100.0 |\
\n| production | 1 | api-server | 1970-01-01T00:00:00 | 200.0 |\
\n+------------+----------+------------+---------------------+--------------------------+",
)
.await;
@@ -378,8 +378,8 @@ async fn aggregators_complex_combined_aggrs() {
"+------------+-----------------------------------------------------------------------------------------------------------+\
\n| job | SUM(http_requests.value) + MIN(http_requests.value) + MAX(http_requests.value) + AVG(http_requests.value) |\
\n+------------+-----------------------------------------------------------------------------------------------------------+\
\n| api-server | 1750 |\
\n| app-server | 4550 |\
\n| api-server | 1750.0 |\
\n| app-server | 4550.0 |\
\n+------------+-----------------------------------------------------------------------------------------------------------+",
)
.await;
@@ -399,8 +399,8 @@ async fn two_aggregators_combined_aggrs() {
"+------------+-----------------------------------------------------+\
\n| job | SUM(http_requests.value) + MIN(http_requests.value) |\
\n+------------+-----------------------------------------------------+\
\n| api-server | 1100 |\
\n| app-server | 3100 |\
\n| api-server | 1100.0 |\
\n| app-server | 3100.0 |\
\n+------------+-----------------------------------------------------+",
)
.await;
@@ -444,14 +444,14 @@ async fn binary_op_plain_columns() {
"+------------+----------+------------+---------------------+-------------------------------------------+\
\n| job | instance | group | ts | http_requests.value - http_requests.value |\
\n+------------+----------+------------+---------------------+-------------------------------------------+\
\n| api-server | 0 | canary | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 0 | production | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 1 | canary | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 1 | production | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 0 | canary | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 0 | production | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 1 | canary | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 1 | production | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 0 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| api-server | 0 | production | 1970-01-01T00:00:00 | 0.0 |\
\n| api-server | 1 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| api-server | 1 | production | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 0 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 0 | production | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 1 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 1 | production | 1970-01-01T00:00:00 | 0.0 |\
\n+------------+----------+------------+---------------------+-------------------------------------------+",
)
.await;

View File

@@ -264,7 +264,8 @@ impl Helper {
| ArrowDataType::Dictionary(_, _)
| ArrowDataType::Decimal128(_, _)
| ArrowDataType::Decimal256(_, _)
| ArrowDataType::Map(_, _) => {
| ArrowDataType::Map(_, _)
| ArrowDataType::RunEndEncoded(_, _) => {
unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type())
}
})

View File

@@ -16,6 +16,7 @@ use std::any::Any;
use std::collections::HashSet;
use std::sync::Arc;
use async_trait::async_trait;
use catalog::error::{self as catalog_err, InvalidCatalogValueSnafu, Result as CatalogResult};
use catalog::helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, SchemaKey,
@@ -122,7 +123,7 @@ impl CatalogManager for FrontendCatalogManager {
.schema(schema)
}
fn table(
async fn table(
&self,
catalog: &str,
schema: &str,
@@ -131,6 +132,7 @@ impl CatalogManager for FrontendCatalogManager {
self.schema(catalog, schema)?
.context(catalog::error::SchemaNotFoundSnafu { catalog, schema })?
.table(table_name)
.await
}
}
@@ -255,6 +257,7 @@ pub struct FrontendSchemaProvider {
datanode_clients: Arc<DatanodeClients>,
}
#[async_trait]
impl SchemaProvider for FrontendSchemaProvider {
fn as_any(&self) -> &dyn Any {
self
@@ -284,44 +287,27 @@ impl SchemaProvider for FrontendSchemaProvider {
.unwrap()
}
fn table(&self, name: &str) -> catalog::error::Result<Option<TableRef>> {
async fn table(&self, name: &str) -> catalog::error::Result<Option<TableRef>> {
let table_global_key = TableGlobalKey {
catalog_name: self.catalog_name.clone(),
schema_name: self.schema_name.clone(),
table_name: name.to_string(),
};
let backend = self.backend.clone();
let partition_manager = self.partition_manager.clone();
let datanode_clients = self.datanode_clients.clone();
let table_name = TableName::new(&self.catalog_name, &self.schema_name, name);
let result: CatalogResult<Option<TableRef>> = std::thread::spawn(|| {
common_runtime::block_on_read(async move {
let res = match backend.get(table_global_key.to_string().as_bytes()).await? {
None => {
return Ok(None);
}
Some(r) => r,
};
let val = TableGlobalValue::from_bytes(res.1).context(InvalidCatalogValueSnafu)?;
let table = Arc::new(DistTable::new(
table_name,
Arc::new(
val.table_info
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
),
partition_manager,
datanode_clients,
backend,
));
Ok(Some(table as _))
})
})
.join()
.unwrap();
result
let Some(kv) = self.backend.get(table_global_key.to_string().as_bytes()).await? else { return Ok(None) };
let v = TableGlobalValue::from_bytes(kv.1).context(InvalidCatalogValueSnafu)?;
let table_info = Arc::new(
v.table_info
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
);
let table = Arc::new(DistTable::new(
TableName::new(&self.catalog_name, &self.schema_name, name),
table_info,
self.partition_manager.clone(),
self.datanode_clients.clone(),
self.backend.clone(),
));
Ok(Some(table))
}
fn register_table(

View File

@@ -252,6 +252,7 @@ impl Instance {
let table = self
.catalog_manager
.table(catalog_name, schema_name, table_name)
.await
.context(error::CatalogSnafu)?;
match table {
None => {
@@ -485,8 +486,12 @@ impl SqlQueryHandler for Instance {
.and_then(|output| query_interceptor.post_execute(output, query_ctx.clone()))
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
self.sql_handler.do_describe(stmt, query_ctx)
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
self.sql_handler.do_describe(stmt, query_ctx).await
}
fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
@@ -825,7 +830,7 @@ mod tests {
drop_table(instance).await;
verify_table_is_dropped(&distributed);
verify_table_is_dropped(&distributed).await;
}
async fn query(instance: &Instance, sql: &str) -> Output {
@@ -858,14 +863,14 @@ mod tests {
let batches = common_recordbatch::util::collect_batches(s).await.unwrap();
let pretty_print = batches.pretty_print().unwrap();
let expected = "\
+-------+---------------------+-----------+-------------+-----------+
| host | ts | cpu | memory | disk_util |
+-------+---------------------+-----------+-------------+-----------+
| 490 | 2013-12-31T16:00:00 | 0.1 | 1 | 9.9 |
| 550-A | 2022-12-31T16:00:00 | 1 | 100 | 9.9 |
| 550-W | 2023-12-31T16:00:00 | 10000 | 1000000 | 9.9 |
| MOSS | 2043-12-31T16:00:00 | 100000000 | 10000000000 | 9.9 |
+-------+---------------------+-----------+-------------+-----------+";
+-------+---------------------+-------------+-----------+-----------+
| host | ts | cpu | memory | disk_util |
+-------+---------------------+-------------+-----------+-----------+
| 490 | 2013-12-31T16:00:00 | 0.1 | 1.0 | 9.9 |
| 550-A | 2022-12-31T16:00:00 | 1.0 | 100.0 | 9.9 |
| 550-W | 2023-12-31T16:00:00 | 10000.0 | 1000000.0 | 9.9 |
| MOSS | 2043-12-31T16:00:00 | 100000000.0 | 1.0e10 | 9.9 |
+-------+---------------------+-------------+-----------+-----------+";
assert_eq!(pretty_print, expected);
}
@@ -877,6 +882,7 @@ mod tests {
.frontend
.catalog_manager()
.table("greptime", "public", "demo")
.await
.unwrap()
.unwrap();
let table = table.as_any().downcast_ref::<DistTable>().unwrap();
@@ -918,12 +924,15 @@ mod tests {
assert_eq!(x, 1);
}
fn verify_table_is_dropped(instance: &MockDistributedInstance) {
assert!(instance.datanodes.iter().all(|(_, x)| x
.catalog_manager()
.table("greptime", "public", "demo")
.unwrap()
.is_none()))
async fn verify_table_is_dropped(instance: &MockDistributedInstance) {
for (_, dn) in instance.datanodes.iter() {
assert!(dn
.catalog_manager()
.table("greptime", "public", "demo")
.await
.unwrap()
.is_none())
}
}
#[tokio::test(flavor = "multi_thread")]

View File

@@ -24,11 +24,12 @@ use api::v1::{
};
use async_trait::async_trait;
use catalog::helper::{SchemaKey, SchemaValue};
use catalog::{CatalogList, CatalogManager, DeregisterTableRequest, RegisterTableRequest};
use catalog::{CatalogManager, DeregisterTableRequest, RegisterTableRequest};
use chrono::DateTime;
use client::Database;
use common_base::Plugins;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_catalog::format_full_table_name;
use common_error::prelude::BoxedError;
use common_query::Output;
use common_telemetry::{debug, info};
@@ -59,11 +60,10 @@ use table::table::AlterContext;
use crate::catalog::FrontendCatalogManager;
use crate::datanode::DatanodeClients;
use crate::error::{
self, AlterExprToRequestSnafu, CatalogEntrySerdeSnafu, CatalogNotFoundSnafu, CatalogSnafu,
ColumnDataTypeSnafu, DeserializePartitionSnafu, ParseSqlSnafu, PrimaryKeyNotFoundSnafu,
RequestDatanodeSnafu, RequestMetaSnafu, Result, SchemaExistsSnafu, SchemaNotFoundSnafu,
StartMetaClientSnafu, TableAlreadyExistSnafu, TableNotFoundSnafu, TableSnafu,
ToTableInsertRequestSnafu, UnrecognizedTableOptionSnafu,
self, AlterExprToRequestSnafu, CatalogEntrySerdeSnafu, CatalogSnafu, ColumnDataTypeSnafu,
DeserializePartitionSnafu, ParseSqlSnafu, PrimaryKeyNotFoundSnafu, RequestDatanodeSnafu,
RequestMetaSnafu, Result, SchemaExistsSnafu, StartMetaClientSnafu, TableAlreadyExistSnafu,
TableNotFoundSnafu, TableSnafu, ToTableInsertRequestSnafu, UnrecognizedTableOptionSnafu,
};
use crate::expr_factory;
use crate::instance::parse_stmt;
@@ -114,6 +114,7 @@ impl DistInstance {
&table_name.schema_name,
&table_name.table_name,
)
.await
.context(CatalogSnafu)?
.is_some()
{
@@ -215,6 +216,7 @@ impl DistInstance {
&table_name.schema_name,
&table_name.table_name,
)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: table_name.to_string(),
@@ -274,6 +276,7 @@ impl DistInstance {
let plan = self
.query_engine
.statement_to_plan(QueryStatement::Sql(stmt), query_ctx)
.await
.context(error::ExecuteStatementSnafu {})?;
self.query_engine.execute(&plan).await
}
@@ -311,6 +314,7 @@ impl DistInstance {
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: stmt.name().to_string(),
@@ -329,6 +333,7 @@ impl DistInstance {
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(CatalogSnafu)?
.context(TableNotFoundSnafu { table_name: table })?;
@@ -435,18 +440,11 @@ impl DistInstance {
let table_name = expr.table_name.as_str();
let table = self
.catalog_manager
.catalog(catalog_name)
.context(CatalogSnafu)?
.context(CatalogNotFoundSnafu { catalog_name })?
.schema(schema_name)
.context(CatalogSnafu)?
.context(SchemaNotFoundSnafu {
schema_info: format!("{catalog_name}.{schema_name}"),
})?
.table(table_name)
.table(catalog_name, schema_name, table_name)
.await
.context(CatalogSnafu)?
.context(TableNotFoundSnafu {
table_name: format!("{catalog_name}.{schema_name}.{table_name}"),
table_name: format_full_table_name(catalog_name, schema_name, table_name),
})?;
let request = common_grpc_expr::alter_expr_to_request(expr.clone())
@@ -503,6 +501,7 @@ impl DistInstance {
let table = self
.catalog_manager
.table(catalog, schema, table_name)
.await
.context(CatalogSnafu)?
.context(TableNotFoundSnafu { table_name })?;
@@ -543,10 +542,15 @@ impl SqlQueryHandler for DistInstance {
self.handle_statement(stmt, query_ctx).await
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
if let Statement::Query(_) = stmt {
self.query_engine
.describe(QueryStatement::Sql(stmt), query_ctx)
.await
.map(Some)
.context(error::DescribeStatementSnafu)
} else {

View File

@@ -94,7 +94,7 @@ mod test {
test_handle_ddl_request(frontend.as_ref()).await;
verify_table_is_dropped(&instance);
verify_table_is_dropped(&instance).await;
}
#[tokio::test(flavor = "multi_thread")]
@@ -203,16 +203,19 @@ mod test {
assert!(matches!(output, Output::AffectedRows(1)));
}
fn verify_table_is_dropped(instance: &MockDistributedInstance) {
assert!(instance.datanodes.iter().all(|(_, x)| x
.catalog_manager()
.table(
"greptime",
"database_created_through_grpc",
"table_created_through_grpc"
)
.unwrap()
.is_none()))
async fn verify_table_is_dropped(instance: &MockDistributedInstance) {
for (_, dn) in instance.datanodes.iter() {
assert!(dn
.catalog_manager()
.table(
"greptime",
"database_created_through_grpc",
"table_created_through_grpc"
)
.await
.unwrap()
.is_none());
}
}
#[tokio::test(flavor = "multi_thread")]
@@ -413,6 +416,7 @@ CREATE TABLE {table_name} (
.frontend
.catalog_manager()
.table("greptime", "public", table_name)
.await
.unwrap()
.unwrap();
let table = table.as_any().downcast_ref::<DistTable>().unwrap();

View File

@@ -92,8 +92,8 @@ monitor1,host=host2 memory=1027 1663840496400340001";
+-------------------------+-------+------+--------+
| ts | host | cpu | memory |
+-------------------------+-------+------+--------+
| 2022-09-22T09:54:56.100 | host1 | 66.6 | 1024 |
| 2022-09-22T09:54:56.400 | host2 | | 1027 |
| 2022-09-22T09:54:56.100 | host1 | 66.6 | 1024.0 |
| 2022-09-22T09:54:56.400 | host2 | | 1027.0 |
+-------------------------+-------+------+--------+"
);
}

View File

@@ -113,9 +113,9 @@ mod tests {
"+---------------------+----------------+-------+-------+-------+",
"| greptime_timestamp | greptime_value | tagk1 | tagk2 | tagk3 |",
"+---------------------+----------------+-------+-------+-------+",
"| 1970-01-01T00:00:01 | 1 | tagv1 | tagv2 | |",
"| 1970-01-01T00:00:02 | 2 | | tagv2 | tagv3 |",
"| 1970-01-01T00:00:03 | 3 | | | |",
"| 1970-01-01T00:00:01 | 1.0 | tagv1 | tagv2 | |",
"| 1970-01-01T00:00:02 | 2.0 | | tagv2 | tagv3 |",
"| 1970-01-01T00:00:03 | 3.0 | | | |",
"+---------------------+----------------+-------+-------+-------+",
]
.into_iter()

View File

@@ -68,9 +68,14 @@ impl SqlQueryHandler for StandaloneSqlQueryHandler {
.context(error::InvokeDatanodeSnafu)
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
self.0
.do_describe(stmt, query_ctx)
.await
.context(error::InvokeDatanodeSnafu)
}

View File

@@ -1006,10 +1006,6 @@ mod test {
vec![binary_expr(col("row_id"), Operator::LtEq, lit(123)).into()], // row_id <= 123
vec![0, 1, 2, 3],
);
test(
vec![binary_expr(col("b"), Operator::Like, lit("foo%")).into()], // b LIKE 'foo%'
vec![0, 1, 2, 3],
);
test(
vec![binary_expr(col("c"), Operator::Gt, lit(123)).into()], // c > 789
vec![0, 1, 2, 3],

View File

@@ -18,6 +18,7 @@ use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use common_catalog::format_full_table_name;
use common_error::ext::BoxedError;
use common_procedure::{BoxedProcedure, ProcedureManager};
use common_telemetry::tracing::log::info;
@@ -341,7 +342,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
return Ok(table);
} else {
return TableExistsSnafu {
table_name: format!("{catalog_name}.{schema_name}.{table_name}"),
table_name: format_full_table_name(catalog_name, schema_name, table_name),
}
.fail();
}
@@ -1369,8 +1370,8 @@ mod tests {
+-------+-----+--------+-------------------------+
| host | cpu | memory | ts |
+-------+-----+--------+-------------------------+
| host2 | 2 | 2 | 1970-01-01T00:00:00.002 |
| host4 | 4 | 4 | 1970-01-01T00:00:00.001 |
| host2 | 2.0 | 2.0 | 1970-01-01T00:00:00.002 |
| host4 | 4.0 | 4.0 | 1970-01-01T00:00:00.001 |
+-------+-----+--------+-------------------------+"
);
}

View File

@@ -5,6 +5,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-recursion = "1.0"
async-trait.workspace = true
bytemuck = "1.12"
catalog = { path = "../catalog" }

View File

@@ -88,6 +88,12 @@ pub enum Error {
"Table (metric) name not found, this indicates a procedure error in PromQL planner"
))]
TableNameNotFound { backtrace: Backtrace },
#[snafu(display("General catalog error: {source}"))]
Catalog {
#[snafu(backtrace)]
source: catalog::error::Error,
},
}
impl ErrorExt for Error {
@@ -108,6 +114,8 @@ impl ErrorExt for Error {
| EmptyRange { .. } => StatusCode::Internal,
TableNotFound { .. } | TableNameNotFound { .. } => StatusCode::TableNotFound,
Catalog { source } => source.status_code(),
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {

View File

@@ -22,7 +22,7 @@ use datafusion::arrow::array::{Array, TimestampMillisecondArray, UInt64Array};
use datafusion::arrow::datatypes::SchemaRef;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::common::DFSchemaRef;
use datafusion::error::Result as DataFusionResult;
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::execution::context::TaskContext;
use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
use datafusion::physical_expr::PhysicalSortExpr;
@@ -156,8 +156,8 @@ impl ExecutionPlan for InstantManipulateExec {
self.input.output_ordering()
}
fn maintains_input_order(&self) -> bool {
true
fn maintains_input_order(&self) -> Vec<bool> {
vec![true; self.children().len()]
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -261,7 +261,7 @@ impl RecordBatchStream for InstantManipulateStream {
}
impl Stream for InstantManipulateStream {
type Item = ArrowResult<RecordBatch>;
type Item = DataFusionResult<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let poll = match self.input.poll_next_unpin(cx) {
@@ -277,7 +277,7 @@ impl Stream for InstantManipulateStream {
impl InstantManipulateStream {
// refer to Go version: https://github.com/prometheus/prometheus/blob/e934d0f01158a1d55fa0ebb035346b195fcc1260/promql/engine.go#L1571
pub fn manipulate(&self, input: RecordBatch) -> ArrowResult<RecordBatch> {
pub fn manipulate(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
let mut take_indices = Vec::with_capacity(input.num_rows());
// TODO(ruihang): maybe the input is not timestamp millisecond array
let ts_column = input
@@ -339,7 +339,7 @@ impl InstantManipulateStream {
record_batch: RecordBatch,
take_indices: Vec<Option<u64>>,
aligned_ts: Vec<Millisecond>,
) -> ArrowResult<RecordBatch> {
) -> DataFusionResult<RecordBatch> {
let aligned_ts = aligned_ts
.into_iter()
.zip(take_indices.iter())
@@ -359,7 +359,8 @@ impl InstantManipulateStream {
.collect::<ArrowResult<Vec<_>>>()?;
arrays[self.time_index] = Arc::new(TimestampMillisecondArray::from(aligned_ts));
let result = RecordBatch::try_new(record_batch.schema(), arrays)?;
let result = RecordBatch::try_new(record_batch.schema(), arrays)
.map_err(DataFusionError::ArrowError)?;
Ok(result)
}
}
@@ -436,14 +437,14 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:01:30 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:30 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 310_000, 10_000, 30_000, expected).await;
@@ -455,22 +456,22 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:10 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:00:40 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:01:10 | 1 | foo |\
\n| 1970-01-01T00:01:30 | 1 | foo |\
\n| 1970-01-01T00:01:40 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:02:10 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:03:10 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:10 | 1 | foo |\
\n| 1970-01-01T00:04:40 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:10 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n| 1970-01-01T00:00:40 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:10 | 1.0 | foo |\
\n| 1970-01-01T00:01:30 | 1.0 | foo |\
\n| 1970-01-01T00:01:40 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:02:10 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:10 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:10 | 1.0 | foo |\
\n| 1970-01-01T00:04:40 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 300_000, 10_000, 10_000, expected).await;
@@ -482,17 +483,17 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:01:30 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:02:30 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:03:30 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:30 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:30 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:02:30 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:30 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 300_000, 30_000, 30_000, expected).await;
@@ -504,33 +505,33 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:10 | 1 | foo |\
\n| 1970-01-01T00:00:20 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:00:40 | 1 | foo |\
\n| 1970-01-01T00:00:50 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:01:10 | 1 | foo |\
\n| 1970-01-01T00:01:20 | 1 | foo |\
\n| 1970-01-01T00:01:30 | 1 | foo |\
\n| 1970-01-01T00:01:40 | 1 | foo |\
\n| 1970-01-01T00:01:50 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:02:10 | 1 | foo |\
\n| 1970-01-01T00:02:20 | 1 | foo |\
\n| 1970-01-01T00:02:30 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:03:10 | 1 | foo |\
\n| 1970-01-01T00:03:20 | 1 | foo |\
\n| 1970-01-01T00:03:30 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:10 | 1 | foo |\
\n| 1970-01-01T00:04:20 | 1 | foo |\
\n| 1970-01-01T00:04:30 | 1 | foo |\
\n| 1970-01-01T00:04:40 | 1 | foo |\
\n| 1970-01-01T00:04:50 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:10 | 1.0 | foo |\
\n| 1970-01-01T00:00:20 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n| 1970-01-01T00:00:40 | 1.0 | foo |\
\n| 1970-01-01T00:00:50 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:10 | 1.0 | foo |\
\n| 1970-01-01T00:01:20 | 1.0 | foo |\
\n| 1970-01-01T00:01:30 | 1.0 | foo |\
\n| 1970-01-01T00:01:40 | 1.0 | foo |\
\n| 1970-01-01T00:01:50 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:02:10 | 1.0 | foo |\
\n| 1970-01-01T00:02:20 | 1.0 | foo |\
\n| 1970-01-01T00:02:30 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:10 | 1.0 | foo |\
\n| 1970-01-01T00:03:20 | 1.0 | foo |\
\n| 1970-01-01T00:03:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:10 | 1.0 | foo |\
\n| 1970-01-01T00:04:20 | 1.0 | foo |\
\n| 1970-01-01T00:04:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:40 | 1.0 | foo |\
\n| 1970-01-01T00:04:50 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 300_000, 30_000, 10_000, expected).await;
@@ -542,37 +543,37 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:10 | 1 | foo |\
\n| 1970-01-01T00:00:20 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:00:40 | 1 | foo |\
\n| 1970-01-01T00:00:50 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:01:10 | 1 | foo |\
\n| 1970-01-01T00:01:20 | 1 | foo |\
\n| 1970-01-01T00:01:30 | 1 | foo |\
\n| 1970-01-01T00:01:40 | 1 | foo |\
\n| 1970-01-01T00:01:50 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:02:10 | 1 | foo |\
\n| 1970-01-01T00:02:20 | 1 | foo |\
\n| 1970-01-01T00:02:30 | 1 | foo |\
\n| 1970-01-01T00:02:40 | 1 | foo |\
\n| 1970-01-01T00:02:50 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:03:10 | 1 | foo |\
\n| 1970-01-01T00:03:20 | 1 | foo |\
\n| 1970-01-01T00:03:30 | 1 | foo |\
\n| 1970-01-01T00:03:40 | 1 | foo |\
\n| 1970-01-01T00:03:50 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:10 | 1 | foo |\
\n| 1970-01-01T00:04:20 | 1 | foo |\
\n| 1970-01-01T00:04:30 | 1 | foo |\
\n| 1970-01-01T00:04:40 | 1 | foo |\
\n| 1970-01-01T00:04:50 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:10 | 1.0 | foo |\
\n| 1970-01-01T00:00:20 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n| 1970-01-01T00:00:40 | 1.0 | foo |\
\n| 1970-01-01T00:00:50 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:10 | 1.0 | foo |\
\n| 1970-01-01T00:01:20 | 1.0 | foo |\
\n| 1970-01-01T00:01:30 | 1.0 | foo |\
\n| 1970-01-01T00:01:40 | 1.0 | foo |\
\n| 1970-01-01T00:01:50 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:02:10 | 1.0 | foo |\
\n| 1970-01-01T00:02:20 | 1.0 | foo |\
\n| 1970-01-01T00:02:30 | 1.0 | foo |\
\n| 1970-01-01T00:02:40 | 1.0 | foo |\
\n| 1970-01-01T00:02:50 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:10 | 1.0 | foo |\
\n| 1970-01-01T00:03:20 | 1.0 | foo |\
\n| 1970-01-01T00:03:30 | 1.0 | foo |\
\n| 1970-01-01T00:03:40 | 1.0 | foo |\
\n| 1970-01-01T00:03:50 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:10 | 1.0 | foo |\
\n| 1970-01-01T00:04:20 | 1.0 | foo |\
\n| 1970-01-01T00:04:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:40 | 1.0 | foo |\
\n| 1970-01-01T00:04:50 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 300_000, 60_000, 10_000, expected).await;
@@ -584,17 +585,17 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:01:30 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:02:30 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:03:30 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:30 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:30 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:02:30 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:30 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 300_000, 60_000, 30_000, expected).await;
@@ -606,8 +607,8 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:01 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:01 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(230_000, 245_000, 0, 1_000, expected).await;
@@ -619,9 +620,9 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:00:10 | 1 | foo |\
\n| 1970-01-01T00:00:30 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:00:10 | 1.0 | foo |\
\n| 1970-01-01T00:00:30 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(0, 30_000, 10_000, 10_000, expected).await;
@@ -633,12 +634,12 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 1 | foo |\
\n| 1970-01-01T00:01:00 | 1 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n| 1970-01-01T00:03:00 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:00:00 | 1.0 | foo |\
\n| 1970-01-01T00:01:00 | 1.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n| 1970-01-01T00:03:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(-900_000, 900_000, 30_000, 60_000, expected).await;
@@ -650,16 +651,16 @@ mod test {
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:03:10 | 1 | foo |\
\n| 1970-01-01T00:03:20 | 1 | foo |\
\n| 1970-01-01T00:03:30 | 1 | foo |\
\n| 1970-01-01T00:04:00 | 1 | foo |\
\n| 1970-01-01T00:04:10 | 1 | foo |\
\n| 1970-01-01T00:04:20 | 1 | foo |\
\n| 1970-01-01T00:04:30 | 1 | foo |\
\n| 1970-01-01T00:04:40 | 1 | foo |\
\n| 1970-01-01T00:04:50 | 1 | foo |\
\n| 1970-01-01T00:05:00 | 1 | foo |\
\n| 1970-01-01T00:03:10 | 1.0 | foo |\
\n| 1970-01-01T00:03:20 | 1.0 | foo |\
\n| 1970-01-01T00:03:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:00 | 1.0 | foo |\
\n| 1970-01-01T00:04:10 | 1.0 | foo |\
\n| 1970-01-01T00:04:20 | 1.0 | foo |\
\n| 1970-01-01T00:04:30 | 1.0 | foo |\
\n| 1970-01-01T00:04:40 | 1.0 | foo |\
\n| 1970-01-01T00:04:50 | 1.0 | foo |\
\n| 1970-01-01T00:05:00 | 1.0 | foo |\
\n+---------------------+-------+------+",
);
do_normalize_test(190_000, 300_000, 30_000, 10_000, expected).await;

View File

@@ -20,6 +20,7 @@ use std::task::{Context, Poll};
use datafusion::arrow::array::{BooleanArray, Float64Array};
use datafusion::arrow::compute;
use datafusion::common::{DFSchemaRef, Result as DataFusionResult, Statistics};
use datafusion::error::DataFusionError;
use datafusion::execution::context::TaskContext;
use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
use datafusion::physical_expr::PhysicalSortExpr;
@@ -139,10 +140,6 @@ impl ExecutionPlan for SeriesNormalizeExec {
self.input.output_ordering()
}
fn maintains_input_order(&self) -> bool {
false
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
vec![self.input.clone()]
}
@@ -214,7 +211,7 @@ pub struct SeriesNormalizeStream {
}
impl SeriesNormalizeStream {
pub fn normalize(&self, input: RecordBatch) -> ArrowResult<RecordBatch> {
pub fn normalize(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
// TODO(ruihang): maybe the input is not timestamp millisecond array
let ts_column = input
.column(self.time_index)
@@ -254,7 +251,8 @@ impl SeriesNormalizeStream {
}
}
let result = compute::filter_record_batch(&ordered_batch, &BooleanArray::from(filter))?;
let result = compute::filter_record_batch(&ordered_batch, &BooleanArray::from(filter))
.map_err(DataFusionError::ArrowError)?;
Ok(result)
}
}
@@ -266,7 +264,7 @@ impl RecordBatchStream for SeriesNormalizeStream {
}
impl Stream for SeriesNormalizeStream {
type Item = ArrowResult<RecordBatch>;
type Item = DataFusionResult<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let poll = match self.input.poll_next_unpin(cx) {
@@ -335,15 +333,15 @@ mod test {
.to_string();
let expected = String::from(
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1970-01-01T00:00:00 | 10 | foo |\
\n| 1970-01-01T00:00:30 | 100 | foo |\
\n| 1970-01-01T00:01:00 | 0 | foo |\
\n| 1970-01-01T00:01:30 | 1000 | foo |\
\n| 1970-01-01T00:02:00 | 1 | foo |\
\n+---------------------+-------+------+",
"+---------------------+--------+------+\
\n| timestamp | value | path |\
\n+---------------------+--------+------+\
\n| 1970-01-01T00:00:00 | 10.0 | foo |\
\n| 1970-01-01T00:00:30 | 100.0 | foo |\
\n| 1970-01-01T00:01:00 | 0.0 | foo |\
\n| 1970-01-01T00:01:30 | 1000.0 | foo |\
\n| 1970-01-01T00:02:00 | 1.0 | foo |\
\n+---------------------+--------+------+",
);
assert_eq!(result_literal, expected);
@@ -367,15 +365,15 @@ mod test {
.to_string();
let expected = String::from(
"+---------------------+-------+------+\
\n| timestamp | value | path |\
\n+---------------------+-------+------+\
\n| 1969-12-31T23:59:59 | 10 | foo |\
\n| 1970-01-01T00:00:29 | 100 | foo |\
\n| 1970-01-01T00:00:59 | 0 | foo |\
\n| 1970-01-01T00:01:29 | 1000 | foo |\
\n| 1970-01-01T00:01:59 | 1 | foo |\
\n+---------------------+-------+------+",
"+---------------------+--------+------+\
\n| timestamp | value | path |\
\n+---------------------+--------+------+\
\n| 1969-12-31T23:59:59 | 10.0 | foo |\
\n| 1970-01-01T00:00:29 | 100.0 | foo |\
\n| 1970-01-01T00:00:59 | 0.0 | foo |\
\n| 1970-01-01T00:01:29 | 1000.0 | foo |\
\n| 1970-01-01T00:01:59 | 1.0 | foo |\
\n+---------------------+--------+------+",
);
assert_eq!(result_literal, expected);

View File

@@ -24,7 +24,7 @@ use datafusion::arrow::datatypes::SchemaRef;
use datafusion::arrow::error::ArrowError;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::common::{DFField, DFSchema, DFSchemaRef};
use datafusion::error::Result as DataFusionResult;
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::execution::context::TaskContext;
use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
use datafusion::physical_expr::PhysicalSortExpr;
@@ -33,7 +33,6 @@ use datafusion::physical_plan::{
DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
Statistics,
};
use datatypes::arrow::error::Result as ArrowResult;
use futures::{Stream, StreamExt};
use crate::extension_plan::Millisecond;
@@ -97,7 +96,9 @@ impl RangeManipulate {
// process time index column
// the raw timestamp field is preserved. And a new timestamp_range field is appended to the last.
let index = input_schema.index_of_column_by_name(None, time_index)?;
let Some(index) = input_schema.index_of_column_by_name(None, time_index)? else {
return Err(datafusion::common::field_not_found(None, time_index, input_schema.as_ref()))
};
let timestamp_range_field = columns[index]
.field()
.clone()
@@ -108,7 +109,9 @@ impl RangeManipulate {
// process value columns
for name in value_columns {
let index = input_schema.index_of_column_by_name(None, name)?;
let Some(index) = input_schema.index_of_column_by_name(None, name)? else {
return Err(datafusion::common::field_not_found(None, name, input_schema.as_ref()))
};
columns[index] = DFField::from(RangeArray::convert_field(columns[index].field()));
}
@@ -211,8 +214,8 @@ impl ExecutionPlan for RangeManipulateExec {
self.input.output_ordering()
}
fn maintains_input_order(&self) -> bool {
true
fn maintains_input_order(&self) -> Vec<bool> {
vec![true; self.children().len()]
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -330,7 +333,7 @@ impl RecordBatchStream for RangeManipulateStream {
}
impl Stream for RangeManipulateStream {
type Item = ArrowResult<RecordBatch>;
type Item = DataFusionResult<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let poll = match self.input.poll_next_unpin(cx) {
@@ -348,7 +351,7 @@ impl RangeManipulateStream {
// Prometheus: https://github.com/prometheus/prometheus/blob/e934d0f01158a1d55fa0ebb035346b195fcc1260/promql/engine.go#L1113-L1198
// But they are not exactly the same, because we don't eager-evaluate on the data in this plan.
// And the generated timestamp is not aligned to the step. It's expected to do later.
pub fn manipulate(&self, input: RecordBatch) -> ArrowResult<RecordBatch> {
pub fn manipulate(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
let mut other_columns = (0..input.columns().len()).collect::<HashSet<_>>();
// calculate the range
let (aligned_ts, ranges) = self.calculate_range(&input);
@@ -382,6 +385,7 @@ impl RangeManipulateStream {
new_columns[self.time_index] = aligned_ts;
RecordBatch::try_new(self.output_schema.clone(), new_columns)
.map_err(DataFusionError::ArrowError)
}
fn calculate_range(&self, input: &RecordBatch) -> (ArrayRef, Vec<(u32, u32)>) {

View File

@@ -31,7 +31,6 @@ use datafusion::physical_plan::{
Statistics,
};
use datatypes::arrow::compute;
use datatypes::arrow::error::Result as ArrowResult;
use futures::{ready, Stream, StreamExt};
#[derive(Debug)]
@@ -113,8 +112,8 @@ impl ExecutionPlan for SeriesDivideExec {
self.input.output_ordering()
}
fn maintains_input_order(&self) -> bool {
true
fn maintains_input_order(&self) -> Vec<bool> {
vec![true; self.children().len()]
}
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
@@ -200,7 +199,7 @@ impl RecordBatchStream for SeriesDivideStream {
}
impl Stream for SeriesDivideStream {
type Item = ArrowResult<RecordBatch>;
type Item = DataFusionResult<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
@@ -242,7 +241,7 @@ impl SeriesDivideStream {
fn fetch_next_batch(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<ArrowResult<RecordBatch>>> {
) -> Poll<Option<DataFusionResult<RecordBatch>>> {
let poll = match self.input.poll_next_unpin(cx) {
Poll::Ready(batch) => {
let _timer = self.metric.elapsed_compute().timer();

View File

@@ -17,7 +17,9 @@ use std::str::FromStr;
use std::sync::Arc;
use std::time::UNIX_EPOCH;
use datafusion::common::{DFSchemaRef, Result as DfResult};
use async_recursion::async_recursion;
use catalog::table_source::DfTableSourceProvider;
use datafusion::common::{DFSchemaRef, OwnedTableReference, Result as DfResult};
use datafusion::datasource::DefaultTableSource;
use datafusion::logical_expr::expr::AggregateFunction;
use datafusion::logical_expr::expr_rewriter::normalize_cols;
@@ -28,8 +30,6 @@ use datafusion::logical_expr::{
use datafusion::optimizer::utils;
use datafusion::prelude::{Column, Expr as DfExpr, JoinType};
use datafusion::scalar::ScalarValue;
use datafusion::sql::planner::ContextProvider;
use datafusion::sql::TableReference;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use promql_parser::label::{MatchOp, Matchers, METRIC_NAME};
use promql_parser::parser::{
@@ -41,8 +41,8 @@ use snafu::{ensure, OptionExt, ResultExt};
use table::table::adapter::DfTableProviderAdapter;
use crate::error::{
DataFusionPlanningSnafu, ExpectExprSnafu, MultipleVectorSnafu, Result, TableNameNotFoundSnafu,
TableNotFoundSnafu, TimeIndexNotFoundSnafu, UnexpectedTokenSnafu, UnknownTableSnafu,
CatalogSnafu, DataFusionPlanningSnafu, ExpectExprSnafu, MultipleVectorSnafu, Result,
TableNameNotFoundSnafu, TimeIndexNotFoundSnafu, UnexpectedTokenSnafu, UnknownTableSnafu,
UnsupportedExprSnafu, ValueNotFoundSnafu,
};
use crate::extension_plan::{
@@ -79,21 +79,25 @@ impl PromPlannerContext {
}
}
pub struct PromPlanner<S: ContextProvider> {
schema_provider: S,
pub struct PromPlanner {
table_provider: DfTableSourceProvider,
ctx: PromPlannerContext,
}
impl<S: ContextProvider> PromPlanner<S> {
pub fn stmt_to_plan(stmt: EvalStmt, schema_provider: S) -> Result<LogicalPlan> {
impl PromPlanner {
pub async fn stmt_to_plan(
table_provider: DfTableSourceProvider,
stmt: EvalStmt,
) -> Result<LogicalPlan> {
let mut planner = Self {
schema_provider,
table_provider,
ctx: PromPlannerContext::from_eval_stmt(&stmt),
};
planner.prom_expr_to_plan(stmt.expr)
planner.prom_expr_to_plan(stmt.expr).await
}
pub fn prom_expr_to_plan(&mut self, prom_expr: PromExpr) -> Result<LogicalPlan> {
#[async_recursion]
pub async fn prom_expr_to_plan(&mut self, prom_expr: PromExpr) -> Result<LogicalPlan> {
let res = match &prom_expr {
PromExpr::Aggregate(AggregateExpr {
op,
@@ -102,7 +106,7 @@ impl<S: ContextProvider> PromPlanner<S> {
param: _param,
modifier,
}) => {
let input = self.prom_expr_to_plan(*expr.clone())?;
let input = self.prom_expr_to_plan(*expr.clone()).await?;
// calculate columns to group by
// Need to append time index column into group by columns
@@ -133,7 +137,7 @@ impl<S: ContextProvider> PromPlanner<S> {
}
PromExpr::Unary(UnaryExpr { expr }) => {
// Unary Expr in PromQL implys the `-` operator
let input = self.prom_expr_to_plan(*expr.clone())?;
let input = self.prom_expr_to_plan(*expr.clone()).await?;
self.projection_for_each_value_column(input, |col| {
Ok(DfExpr::Negative(Box::new(DfExpr::Column(col.into()))))
})?
@@ -166,7 +170,7 @@ impl<S: ContextProvider> PromPlanner<S> {
.fail()?,
// lhs is a literal, rhs is a column
(Some(expr), None) => {
let input = self.prom_expr_to_plan(*rhs.clone())?;
let input = self.prom_expr_to_plan(*rhs.clone()).await?;
let bin_expr_builder = |col: &String| {
let mut binary_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(expr.clone()),
@@ -189,7 +193,7 @@ impl<S: ContextProvider> PromPlanner<S> {
}
// lhs is a column, rhs is a literal
(None, Some(expr)) => {
let input = self.prom_expr_to_plan(*lhs.clone())?;
let input = self.prom_expr_to_plan(*lhs.clone()).await?;
let bin_expr_builder = |col: &String| {
let mut binary_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(DfExpr::Column(col.into())),
@@ -212,11 +216,11 @@ impl<S: ContextProvider> PromPlanner<S> {
}
// both are columns. join them on time index
(None, None) => {
let left_input = self.prom_expr_to_plan(*lhs.clone())?;
let left_input = self.prom_expr_to_plan(*lhs.clone()).await?;
let left_value_columns = self.ctx.value_columns.clone();
let left_schema = left_input.schema().clone();
let right_input = self.prom_expr_to_plan(*rhs.clone())?;
let right_input = self.prom_expr_to_plan(*rhs.clone()).await?;
let right_value_columns = self.ctx.value_columns.clone();
let right_schema = right_input.schema().clone();
@@ -256,7 +260,7 @@ impl<S: ContextProvider> PromPlanner<S> {
}
}
}
PromExpr::Paren(ParenExpr { expr }) => self.prom_expr_to_plan(*expr.clone())?,
PromExpr::Paren(ParenExpr { expr }) => self.prom_expr_to_plan(*expr.clone()).await?,
PromExpr::Subquery(SubqueryExpr { .. }) => UnsupportedExprSnafu {
name: "Prom Subquery",
}
@@ -276,8 +280,10 @@ impl<S: ContextProvider> PromPlanner<S> {
at: _,
}) => {
let matchers = self.preprocess_label_matchers(matchers)?;
self.setup_context()?;
let normalize = self.selector_to_series_normalize_plan(offset, matchers)?;
self.setup_context().await?;
let normalize = self
.selector_to_series_normalize_plan(offset, matchers)
.await?;
let manipulate = InstantManipulate::new(
self.ctx.start,
self.ctx.end,
@@ -301,8 +307,10 @@ impl<S: ContextProvider> PromPlanner<S> {
offset, matchers, ..
} = vector_selector;
let matchers = self.preprocess_label_matchers(matchers)?;
self.setup_context()?;
let normalize = self.selector_to_series_normalize_plan(offset, matchers)?;
self.setup_context().await?;
let normalize = self
.selector_to_series_normalize_plan(offset, matchers)
.await?;
let manipulate = RangeManipulate::new(
self.ctx.start,
self.ctx.end,
@@ -324,10 +332,11 @@ impl<S: ContextProvider> PromPlanner<S> {
}
PromExpr::Call(Call { func, args }) => {
let args = self.create_function_args(&args.args)?;
let input =
self.prom_expr_to_plan(args.input.with_context(|| ExpectExprSnafu {
let input = self
.prom_expr_to_plan(args.input.with_context(|| ExpectExprSnafu {
expr: prom_expr.clone(),
})?)?;
})?)
.await?;
let mut func_exprs = self.create_function_expr(func, args.literals)?;
func_exprs.insert(0, self.create_time_index_column_expr()?);
func_exprs.extend_from_slice(&self.create_tag_column_exprs()?);
@@ -358,8 +367,8 @@ impl<S: ContextProvider> PromPlanner<S> {
Ok(Matchers { matchers })
}
fn selector_to_series_normalize_plan(
&self,
async fn selector_to_series_normalize_plan(
&mut self,
offset: &Option<Offset>,
label_matchers: Matchers,
) -> Result<LogicalPlan> {
@@ -383,7 +392,9 @@ impl<S: ContextProvider> PromPlanner<S> {
)));
// make table scan with filter exprs
let table_scan = self.create_table_scan_plan(&table_name, filters.clone())?;
let table_scan = self
.create_table_scan_plan(&table_name, filters.clone())
.await?;
// make filter and sort plan
let sort_plan = LogicalPlanBuilder::from(table_scan)
@@ -508,12 +519,19 @@ impl<S: ContextProvider> PromPlanner<S> {
Ok(exprs)
}
fn create_table_scan_plan(&self, table_name: &str, filter: Vec<DfExpr>) -> Result<LogicalPlan> {
let table_ref = TableReference::Bare { table: table_name };
async fn create_table_scan_plan(
&mut self,
table_name: &str,
filter: Vec<DfExpr>,
) -> Result<LogicalPlan> {
let table_ref = OwnedTableReference::Bare {
table: table_name.to_string(),
};
let provider = self
.schema_provider
.get_table_provider(table_ref)
.context(TableNotFoundSnafu { table: table_name })?;
.table_provider
.resolve_table(table_ref)
.await
.context(CatalogSnafu)?;
let result = LogicalPlanBuilder::scan_with_filters(table_name, provider, None, filter)
.context(DataFusionPlanningSnafu)?
.build()
@@ -522,16 +540,19 @@ impl<S: ContextProvider> PromPlanner<S> {
}
/// Setup [PromPlannerContext]'s state fields.
fn setup_context(&mut self) -> Result<()> {
async fn setup_context(&mut self) -> Result<()> {
let table_name = self
.ctx
.table_name
.clone()
.context(TableNameNotFoundSnafu)?;
let table = self
.schema_provider
.get_table_provider(TableReference::Bare { table: &table_name })
.context(TableNotFoundSnafu { table: &table_name })?
.table_provider
.resolve_table(OwnedTableReference::Bare {
table: table_name.to_string(),
})
.await
.context(CatalogSnafu)?
.as_any()
.downcast_ref::<DefaultTableSource>()
.context(UnknownTableSnafu)?
@@ -980,19 +1001,17 @@ mod test {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use promql_parser::parser;
use query::query_engine::QueryEngineState;
use query::DfContextProviderAdapter;
use session::context::QueryContext;
use table::metadata::{TableInfoBuilder, TableMetaBuilder};
use table::test_util::EmptyTable;
use super::*;
async fn build_test_context_provider(
async fn build_test_table_provider(
table_name: String,
num_tag: usize,
num_field: usize,
) -> DfContextProviderAdapter {
) -> DfTableSourceProvider {
let mut columns = vec![];
for i in 0..num_tag {
columns.push(ColumnSchema::new(
@@ -1041,10 +1060,7 @@ mod test {
})
.await
.unwrap();
let query_engine_state = QueryEngineState::new(catalog_list, Default::default());
let query_context = QueryContext::new();
DfContextProviderAdapter::new(query_engine_state, query_context.into())
DfTableSourceProvider::new(catalog_list, false, &QueryContext::new())
}
// {
@@ -1075,8 +1091,10 @@ mod test {
lookback_delta: Duration::from_secs(1),
};
let context_provider = build_test_context_provider("some_metric".to_string(), 1, 1).await;
let plan = PromPlanner::stmt_to_plan(eval_stmt, context_provider).unwrap();
let table_provider = build_test_table_provider("some_metric".to_string(), 1, 1).await;
let plan = PromPlanner::stmt_to_plan(table_provider, eval_stmt)
.await
.unwrap();
let expected = String::from(
"Filter: TEMPLATE(field_0) IS NOT NULL [timestamp:Timestamp(Millisecond, None), TEMPLATE(field_0):Float64;N, tag_0:Utf8]\
@@ -1278,8 +1296,10 @@ mod test {
};
// test group by
let context_provider = build_test_context_provider("some_metric".to_string(), 2, 2).await;
let plan = PromPlanner::stmt_to_plan(eval_stmt.clone(), context_provider).unwrap();
let table_provider = build_test_table_provider("some_metric".to_string(), 2, 2).await;
let plan = PromPlanner::stmt_to_plan(table_provider, eval_stmt.clone())
.await
.unwrap();
let expected_no_without = String::from(
"Sort: some_metric.tag_1 ASC NULLS LAST, some_metric.timestamp ASC NULLS LAST [tag_1:Utf8, timestamp:Timestamp(Millisecond, None), TEMPLATE(some_metric.field_0):Float64;N, TEMPLATE(some_metric.field_1):Float64;N]\
\n Aggregate: groupBy=[[some_metric.tag_1, some_metric.timestamp]], aggr=[[TEMPLATE(some_metric.field_0), TEMPLATE(some_metric.field_1)]] [tag_1:Utf8, timestamp:Timestamp(Millisecond, None), TEMPLATE(some_metric.field_0):Float64;N, TEMPLATE(some_metric.field_1):Float64;N]\
@@ -1301,8 +1321,10 @@ mod test {
vec![String::from("tag_1")].into_iter().collect(),
));
}
let context_provider = build_test_context_provider("some_metric".to_string(), 2, 2).await;
let plan = PromPlanner::stmt_to_plan(eval_stmt, context_provider).unwrap();
let table_provider = build_test_table_provider("some_metric".to_string(), 2, 2).await;
let plan = PromPlanner::stmt_to_plan(table_provider, eval_stmt)
.await
.unwrap();
let expected_without = String::from(
"Sort: some_metric.tag_0 ASC NULLS LAST, some_metric.timestamp ASC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), TEMPLATE(some_metric.field_0):Float64;N, TEMPLATE(some_metric.field_1):Float64;N]\
\n Aggregate: groupBy=[[some_metric.tag_0, some_metric.timestamp]], aggr=[[TEMPLATE(some_metric.field_0), TEMPLATE(some_metric.field_1)]] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), TEMPLATE(some_metric.field_0):Float64;N, TEMPLATE(some_metric.field_1):Float64;N]\
@@ -1419,8 +1441,10 @@ mod test {
lookback_delta: Duration::from_secs(1),
};
let context_provider = build_test_context_provider("some_metric".to_string(), 1, 1).await;
let plan = PromPlanner::stmt_to_plan(eval_stmt, context_provider).unwrap();
let table_provider = build_test_table_provider("some_metric".to_string(), 1, 1).await;
let plan = PromPlanner::stmt_to_plan(table_provider, eval_stmt)
.await
.unwrap();
let expected = String::from(
"Projection: lhs.tag_0, lhs.timestamp, some_metric.field_0 + some_metric.field_0 AS some_metric.field_0 + some_metric.field_0 [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), some_metric.field_0 + some_metric.field_0:Float64;N]\
@@ -1455,8 +1479,10 @@ mod test {
lookback_delta: Duration::from_secs(1),
};
let context_provider = build_test_context_provider("some_metric".to_string(), 1, 1).await;
let plan = PromPlanner::stmt_to_plan(eval_stmt, context_provider).unwrap();
let table_provider = build_test_table_provider("some_metric".to_string(), 1, 1).await;
let plan = PromPlanner::stmt_to_plan(table_provider, eval_stmt)
.await
.unwrap();
assert_eq!(plan.display_indent_schema().to_string(), expected);
}
@@ -1528,6 +1554,7 @@ mod test {
}
#[tokio::test]
#[should_panic]
async fn increase_aggr() {
let query = "increase(some_metric[5m])";
let expected = String::from(

View File

@@ -6,6 +6,7 @@ license.workspace = true
[dependencies]
arc-swap = "1.0"
arrow-schema.workspace = true
async-trait = "0.1"
catalog = { path = "../catalog" }
chrono.workspace = true

View File

@@ -21,6 +21,7 @@ mod planner;
use std::sync::Arc;
use async_trait::async_trait;
use catalog::table_source::DfTableSourceProvider;
use catalog::CatalogListRef;
use common_base::Plugins;
use common_error::prelude::BoxedError;
@@ -35,6 +36,7 @@ use common_recordbatch::{EmptyRecordBatchStream, SendableRecordBatchStream};
use common_telemetry::timer;
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_sql::planner::{ParserOptions, SqlToRel};
use datatypes::schema::Schema;
use promql::planner::PromPlanner;
use promql_parser::parser::EvalStmt;
@@ -44,15 +46,15 @@ use sql::statements::statement::Statement;
pub use crate::datafusion::catalog_adapter::DfCatalogListAdapter;
pub use crate::datafusion::planner::DfContextProviderAdapter;
use crate::datafusion::planner::DfPlanner;
use crate::error::{QueryExecutionSnafu, QueryPlanSnafu, Result};
use crate::error::{
DataFusionSnafu, PlanSqlSnafu, QueryExecutionSnafu, QueryPlanSnafu, Result, SqlSnafu,
};
use crate::executor::QueryExecutor;
use crate::logical_optimizer::LogicalOptimizer;
use crate::parser::QueryStatement;
use crate::physical_optimizer::PhysicalOptimizer;
use crate::physical_planner::PhysicalPlanner;
use crate::plan::LogicalPlan;
use crate::planner::Planner;
use crate::query_engine::{QueryEngineContext, QueryEngineState};
use crate::{metric, QueryEngine};
@@ -67,19 +69,54 @@ impl DatafusionQueryEngine {
}
}
fn plan_sql_stmt(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
let context_provider = DfContextProviderAdapter::new(self.state.clone(), query_ctx);
let planner = DfPlanner::new(&context_provider);
planner
.statement_to_plan(stmt)
.map_err(BoxedError::new)
.context(QueryPlanSnafu)
async fn plan_sql_stmt(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<LogicalPlan> {
let session_state = self.state.session_state();
let df_stmt = (&stmt).try_into().context(SqlSnafu)?;
let config_options = session_state.config().config_options();
let parser_options = ParserOptions {
enable_ident_normalization: config_options.sql_parser.enable_ident_normalization,
parse_float_as_decimal: config_options.sql_parser.parse_float_as_decimal,
};
let context_provider = DfContextProviderAdapter::try_new(
self.state.clone(),
session_state,
&df_stmt,
query_ctx,
)
.await?;
let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
let result = sql_to_rel.statement_to_plan(df_stmt).with_context(|_| {
let sql = if let Statement::Query(query) = stmt {
query.inner.to_string()
} else {
format!("{stmt:?}")
};
PlanSqlSnafu { sql }
})?;
Ok(LogicalPlan::DfPlan(result))
}
// TODO(ruihang): test this method once parser is ready.
fn plan_promql_stmt(&self, stmt: EvalStmt, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
let context_provider = DfContextProviderAdapter::new(self.state.clone(), query_ctx);
PromPlanner::stmt_to_plan(stmt, context_provider)
async fn plan_promql_stmt(
&self,
stmt: EvalStmt,
query_ctx: QueryContextRef,
) -> Result<LogicalPlan> {
let table_provider = DfTableSourceProvider::new(
self.state.catalog_list().clone(),
self.state.disallow_cross_schema_query(),
query_ctx.as_ref(),
);
PromPlanner::stmt_to_plan(table_provider, stmt)
.await
.map(LogicalPlan::DfPlan)
.map_err(BoxedError::new)
.context(QueryPlanSnafu)
@@ -93,28 +130,28 @@ impl QueryEngine for DatafusionQueryEngine {
"datafusion"
}
fn statement_to_plan(
async fn statement_to_plan(
&self,
stmt: QueryStatement,
query_ctx: QueryContextRef,
) -> Result<LogicalPlan> {
match stmt {
QueryStatement::Sql(stmt) => self.plan_sql_stmt(stmt, query_ctx),
QueryStatement::Promql(stmt) => self.plan_promql_stmt(stmt, query_ctx),
QueryStatement::Sql(stmt) => self.plan_sql_stmt(stmt, query_ctx).await,
QueryStatement::Promql(stmt) => self.plan_promql_stmt(stmt, query_ctx).await,
}
}
fn describe(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<Schema> {
async fn describe(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<Schema> {
// TODO(sunng87): consider cache optmised logical plan between describe
// and execute
let plan = self.statement_to_plan(stmt, query_ctx)?;
let mut ctx = QueryEngineContext::new(self.state.clone());
let plan = self.statement_to_plan(stmt, query_ctx).await?;
let mut ctx = QueryEngineContext::new(self.state.session_state());
let optimised_plan = self.optimize_logical_plan(&mut ctx, &plan)?;
optimised_plan.schema()
}
async fn execute(&self, plan: &LogicalPlan) -> Result<Output> {
let mut ctx = QueryEngineContext::new(self.state.clone());
let mut ctx = QueryEngineContext::new(self.state.session_state());
let logical_plan = self.optimize_logical_plan(&mut ctx, plan)?;
let physical_plan = self.create_physical_plan(&mut ctx, &logical_plan).await?;
let physical_plan = self.optimize_physical_plan(&mut ctx, physical_plan)?;
@@ -123,7 +160,7 @@ impl QueryEngine for DatafusionQueryEngine {
}
async fn execute_physical(&self, plan: &Arc<dyn PhysicalPlan>) -> Result<Output> {
let ctx = QueryEngineContext::new(self.state.clone());
let ctx = QueryEngineContext::new(self.state.session_state());
Ok(Output::Stream(self.execute_stream(&ctx, plan)?))
}
@@ -150,14 +187,14 @@ impl QueryEngine for DatafusionQueryEngine {
impl LogicalOptimizer for DatafusionQueryEngine {
fn optimize_logical_plan(
&self,
_: &mut QueryEngineContext,
ctx: &mut QueryEngineContext,
plan: &LogicalPlan,
) -> Result<LogicalPlan> {
let _timer = timer!(metric::METRIC_OPTIMIZE_LOGICAL_ELAPSED);
match plan {
LogicalPlan::DfPlan(df_plan) => {
let optimized_plan = self
.state
let state = ctx.state();
let optimized_plan = state
.optimize(df_plan)
.context(error::DatafusionSnafu {
msg: "Fail to optimize logical plan",
@@ -175,14 +212,14 @@ impl LogicalOptimizer for DatafusionQueryEngine {
impl PhysicalPlanner for DatafusionQueryEngine {
async fn create_physical_plan(
&self,
_: &mut QueryEngineContext,
ctx: &mut QueryEngineContext,
logical_plan: &LogicalPlan,
) -> Result<Arc<dyn PhysicalPlan>> {
let _timer = timer!(metric::METRIC_CREATE_PHYSICAL_ELAPSED);
match logical_plan {
LogicalPlan::DfPlan(df_plan) => {
let physical_plan = self
.state
let state = ctx.state();
let physical_plan = state
.create_physical_plan(df_plan)
.await
.context(error::DatafusionSnafu {
@@ -210,12 +247,12 @@ impl PhysicalPlanner for DatafusionQueryEngine {
impl PhysicalOptimizer for DatafusionQueryEngine {
fn optimize_physical_plan(
&self,
_: &mut QueryEngineContext,
ctx: &mut QueryEngineContext,
plan: Arc<dyn PhysicalPlan>,
) -> Result<Arc<dyn PhysicalPlan>> {
let _timer = timer!(metric::METRIC_OPTIMIZE_PHYSICAL_ELAPSED);
let new_plan = plan
let mut new_plan = plan
.as_any()
.downcast_ref::<PhysicalPlanAdapter>()
.context(error::PhysicalPlanDowncastSnafu)
@@ -223,14 +260,13 @@ impl PhysicalOptimizer for DatafusionQueryEngine {
.context(QueryExecutionSnafu)?
.df_plan();
let new_plan = self
.state
.optimize_physical_plan(new_plan)
.context(error::DatafusionSnafu {
msg: "Fail to optimize physical plan",
})
.map_err(BoxedError::new)
.context(QueryExecutionSnafu)?;
let state = ctx.state();
let config = state.config_options();
for optimizer in state.physical_optimizers() {
new_plan = optimizer
.optimize(new_plan, config)
.context(DataFusionSnafu)?;
}
Ok(Arc::new(PhysicalPlanAdapter::new(plan.schema(), new_plan)))
}
}
@@ -308,14 +344,15 @@ mod tests {
QueryEngineFactory::new(catalog_list).query_engine()
}
#[test]
fn test_sql_to_plan() {
#[tokio::test]
async fn test_sql_to_plan() {
let engine = create_test_engine();
let sql = "select sum(number) from numbers limit 20";
let stmt = QueryLanguageParser::parse_sql(sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
// TODO(sunng87): do not rely on to_string for compare
@@ -336,6 +373,7 @@ mod tests {
let stmt = QueryLanguageParser::parse_sql(sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();
@@ -364,8 +402,8 @@ mod tests {
}
}
#[test]
fn test_describe() {
#[tokio::test]
async fn test_describe() {
let engine = create_test_engine();
let sql = "select sum(number) from numbers limit 20";
@@ -373,6 +411,7 @@ mod tests {
let schema = engine
.describe(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
assert_eq!(

View File

@@ -17,6 +17,7 @@
use std::any::Any;
use std::sync::Arc;
use async_trait::async_trait;
use catalog::error::{self as catalog_error, Error};
use catalog::{
CatalogListRef, CatalogProvider, CatalogProviderRef, SchemaProvider, SchemaProviderRef,
@@ -137,6 +138,7 @@ struct DfSchemaProviderAdapter {
schema_provider: Arc<dyn SchemaProvider>,
}
#[async_trait]
impl DfSchemaProvider for DfSchemaProviderAdapter {
fn as_any(&self) -> &dyn Any {
self
@@ -148,9 +150,10 @@ impl DfSchemaProvider for DfSchemaProviderAdapter {
.expect("datafusion does not accept fallible catalog access")
}
fn table(&self, name: &str) -> Option<Arc<dyn DfTableProvider>> {
async fn table(&self, name: &str) -> Option<Arc<dyn DfTableProvider>> {
self.schema_provider
.table(name)
.await
.expect("datafusion does not accept fallible catalog access")
.map(|table| Arc::new(DfTableProviderAdapter::new(table)) as _)
}
@@ -186,6 +189,7 @@ struct SchemaProviderAdapter {
df_schema_provider: Arc<dyn DfSchemaProvider>,
}
#[async_trait]
impl SchemaProvider for SchemaProviderAdapter {
fn as_any(&self) -> &dyn Any {
self
@@ -196,8 +200,9 @@ impl SchemaProvider for SchemaProviderAdapter {
Ok(self.df_schema_provider.table_names())
}
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
let table = self.df_schema_provider.table(name).map(|table_provider| {
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
let table = self.df_schema_provider.table(name).await;
let table = table.map(|table_provider| {
match table_provider
.as_any()
.downcast_ref::<DfTableProviderAdapter>()
@@ -282,8 +287,8 @@ mod tests {
.unwrap();
}
#[test]
pub fn test_register_table() {
#[tokio::test]
async fn test_register_table() {
let adapter = DfSchemaProviderAdapter {
schema_provider: Arc::new(MemorySchemaProvider::new()),
};
@@ -296,7 +301,7 @@ mod tests {
))),
)
.unwrap();
adapter.table("test_table").unwrap();
adapter.table("test_table").await.unwrap();
}
#[test]

View File

@@ -31,13 +31,6 @@ pub enum InnerError {
#[snafu(display("PhysicalPlan downcast failed"))]
PhysicalPlanDowncast { backtrace: Backtrace },
#[snafu(display("Cannot plan SQL: {}, source: {}", sql, source))]
PlanSql {
sql: String,
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("Fail to convert arrow schema, source: {}", source))]
ConvertSchema {
#[snafu(backtrace)]
@@ -77,7 +70,6 @@ impl ErrorExt for InnerError {
PhysicalPlanDowncast { .. } | ConvertSchema { .. } | TableSchemaMismatch { .. } => {
StatusCode::Unexpected
}
PlanSql { .. } => StatusCode::PlanQuery,
ConvertDfRecordBatchStream { source } => source.status_code(),
ExecutePhysicalPlan { source } => source.status_code(),
}
@@ -114,12 +106,6 @@ mod tests {
.unwrap();
assert_error(&err, StatusCode::EngineExecuteQuery);
let err = throw_df_error()
.context(PlanSqlSnafu { sql: "" })
.err()
.unwrap();
assert_error(&err, StatusCode::PlanQuery);
let res: Result<(), InnerError> = PhysicalPlanDowncastSnafu {}.fail();
let err = res.err().unwrap();
assert_error(&err, StatusCode::Unexpected);

View File

@@ -12,127 +12,103 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::Arc;
use common_error::prelude::BoxedError;
use arrow_schema::DataType;
use catalog::table_source::DfTableSourceProvider;
use common_query::logical_plan::create_aggregate_function;
use datafusion::catalog::TableReference;
use datafusion::error::Result as DfResult;
use datafusion::execution::context::SessionState;
use datafusion::physical_plan::udaf::AggregateUDF;
use datafusion::physical_plan::udf::ScalarUDF;
use datafusion::sql::planner::{ContextProvider, PlannerContext, SqlToRel};
use datafusion_common::ScalarValue;
use datafusion::sql::planner::ContextProvider;
use datafusion_common::config::ConfigOptions;
use datafusion_common::{DataFusionError, OwnedTableReference};
use datafusion_expr::TableSource;
use datatypes::arrow::datatypes::DataType;
use datatypes::prelude::DataType as DataTypeTrait;
use datafusion_physical_expr::var_provider::{is_system_variables, VarType};
use datafusion_sql::parser::Statement as DfStatement;
use session::context::QueryContextRef;
use snafu::ResultExt;
use sql::statements::explain::Explain;
use sql::statements::query::Query;
use sql::statements::statement::Statement;
use crate::datafusion::error;
use crate::error::{QueryPlanSnafu, Result};
use crate::plan::LogicalPlan;
use crate::planner::Planner;
use crate::error::{CatalogSnafu, DataFusionSnafu, Result};
use crate::query_engine::QueryEngineState;
pub struct DfPlanner<'a, S: ContextProvider> {
sql_to_rel: SqlToRel<'a, S>,
}
impl<'a, S: ContextProvider + Send + Sync> DfPlanner<'a, S> {
/// Creates a DataFusion planner instance
pub fn new(schema_provider: &'a S) -> Self {
let rel = SqlToRel::new(schema_provider);
Self { sql_to_rel: rel }
}
/// Converts QUERY statement to logical plan.
pub fn query_to_plan(&self, query: Box<Query>) -> Result<LogicalPlan> {
// todo(hl): original SQL should be provided as an argument
let sql = query.inner.to_string();
let mut context = PlannerContext::new_with_prepare_param_data_types(
query
.param_types()
.iter()
.map(|v| v.as_arrow_type())
.collect(),
);
let result = self
.sql_to_rel
.query_to_plan(query.inner, &mut context)
.context(error::PlanSqlSnafu { sql })
.map_err(BoxedError::new)
.context(QueryPlanSnafu)?;
Ok(LogicalPlan::DfPlan(result))
}
/// Converts EXPLAIN statement to logical plan.
pub fn explain_to_plan(&self, explain: Explain) -> Result<LogicalPlan> {
let result = self
.sql_to_rel
.sql_statement_to_plan(explain.inner.clone())
.context(error::PlanSqlSnafu {
sql: explain.to_string(),
})
.map_err(BoxedError::new)
.context(QueryPlanSnafu)?;
Ok(LogicalPlan::DfPlan(result))
}
}
impl<'a, S> Planner for DfPlanner<'a, S>
where
S: ContextProvider + Send + Sync,
{
/// Converts statement to logical plan using datafusion planner
fn statement_to_plan(&self, statement: Statement) -> Result<LogicalPlan> {
match statement {
Statement::Query(qb) => self.query_to_plan(qb),
Statement::Explain(explain) => self.explain_to_plan(explain),
// The TQL has it's a dedicated planner
Statement::Tql(_tql) => unreachable!(),
Statement::ShowTables(_)
| Statement::Delete(_)
| Statement::ShowDatabases(_)
| Statement::ShowCreateTable(_)
| Statement::DescribeTable(_)
| Statement::CreateTable(_)
| Statement::CreateDatabase(_)
| Statement::Alter(_)
| Statement::Insert(_)
| Statement::DropTable(_)
| Statement::Use(_)
| Statement::Copy(_) => unreachable!(),
}
}
}
pub struct DfContextProviderAdapter {
state: QueryEngineState,
query_ctx: QueryContextRef,
engine_state: QueryEngineState,
session_state: SessionState,
tables: HashMap<String, Arc<dyn TableSource>>,
table_provider: DfTableSourceProvider,
}
impl DfContextProviderAdapter {
pub fn new(state: QueryEngineState, query_ctx: QueryContextRef) -> Self {
Self { state, query_ctx }
pub(crate) async fn try_new(
engine_state: QueryEngineState,
session_state: SessionState,
df_stmt: &DfStatement,
query_ctx: QueryContextRef,
) -> Result<Self> {
let table_names = session_state
.resolve_table_references(df_stmt)
.context(DataFusionSnafu)?;
let mut table_provider = DfTableSourceProvider::new(
engine_state.catalog_list().clone(),
engine_state.disallow_cross_schema_query(),
query_ctx.as_ref(),
);
let tables = resolve_tables(table_names, &mut table_provider).await?;
Ok(Self {
engine_state,
session_state,
tables,
table_provider,
})
}
}
async fn resolve_tables(
table_names: Vec<OwnedTableReference>,
table_provider: &mut DfTableSourceProvider,
) -> Result<HashMap<String, Arc<dyn TableSource>>> {
let mut tables = HashMap::with_capacity(table_names.len());
for table_name in table_names {
let resolved_name = table_provider
.resolve_table_ref(table_name.as_table_reference())
.context(CatalogSnafu)?;
if let Entry::Vacant(v) = tables.entry(resolved_name.to_string()) {
let table = table_provider
.resolve_table(table_name)
.await
.context(CatalogSnafu)?;
v.insert(table);
}
}
Ok(tables)
}
impl ContextProvider for DfContextProviderAdapter {
fn get_table_provider(&self, name: TableReference) -> DfResult<Arc<dyn TableSource>> {
self.state.get_table_provider(self.query_ctx.clone(), name)
let table_ref = self.table_provider.resolve_table_ref(name)?;
self.tables
.get(&table_ref.to_string())
.cloned()
.ok_or_else(|| DataFusionError::Plan(format!("table '{}' not found", table_ref)))
}
fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
self.state.get_function_meta(name)
self.session_state.scalar_functions().get(name).cloned()
}
fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
self.state.aggregate_function(name).map(|func| {
self.engine_state.aggregate_function(name).map(|func| {
Arc::new(
create_aggregate_function(func.name(), func.args_count(), func.create()).into(),
)
@@ -140,10 +116,24 @@ impl ContextProvider for DfContextProviderAdapter {
}
fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType> {
self.state.get_variable_type(variable_names)
if variable_names.is_empty() {
return None;
}
let provider_type = if is_system_variables(variable_names) {
VarType::System
} else {
VarType::UserDefined
};
self.session_state
.execution_props()
.var_providers
.as_ref()
.and_then(|provider| provider.get(&provider_type)?.get_type(variable_names))
}
fn get_config_option(&self, variable: &str) -> Option<ScalarValue> {
self.state.get_config_option(variable)
fn options(&self) -> &ConfigOptions {
self.session_state.config_options()
}
}

View File

@@ -88,6 +88,25 @@ pub enum Error {
source: std::num::ParseFloatError,
backtrace: Backtrace,
},
#[snafu(display("DataFusion error: {}", source))]
DataFusion {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("General SQL error: {}", source))]
Sql {
#[snafu(backtrace)]
source: sql::error::Error,
},
#[snafu(display("Cannot plan SQL: {}, source: {}", sql, source))]
PlanSql {
sql: String,
source: DataFusionError,
backtrace: Backtrace,
},
}
impl ErrorExt for Error {
@@ -108,6 +127,9 @@ impl ErrorExt for Error {
CreateRecordBatch { source } => source.status_code(),
Datatype { source } => source.status_code(),
QueryExecution { source } | QueryPlan { source } => source.status_code(),
DataFusion { .. } => StatusCode::Internal,
Sql { source } => source.status_code(),
PlanSql { .. } => StatusCode::PlanQuery,
}
}

View File

@@ -45,8 +45,8 @@ impl OptimizerRule for TypeConversionRule {
match plan {
LogicalPlan::Filter(filter) => {
let rewritten = filter.predicate().clone().rewrite(&mut converter)?;
let Some(plan) = self.try_optimize(filter.input(), _config)? else { return Ok(None) };
let rewritten = filter.predicate.clone().rewrite(&mut converter)?;
let Some(plan) = self.try_optimize(&filter.input, _config)? else { return Ok(None) };
Ok(Some(LogicalPlan::Filter(Filter::try_new(
rewritten,
Arc::new(plan),
@@ -115,7 +115,10 @@ impl OptimizerRule for TypeConversionRule {
| LogicalPlan::CreateCatalogSchema { .. }
| LogicalPlan::CreateCatalog { .. }
| LogicalPlan::EmptyRelation(_)
| LogicalPlan::Prepare(_) => Ok(Some(plan.clone())),
| LogicalPlan::Prepare(_)
| LogicalPlan::Dml(_)
| LogicalPlan::DescribeTable(_)
| LogicalPlan::Unnest(_) => Ok(Some(plan.clone())),
}
}

View File

@@ -157,7 +157,7 @@ mod test {
distinct: false, \
top: None, \
projection: \
[Wildcard(WildcardAdditionalOptions { opt_exclude: None, opt_except: None })], \
[Wildcard(WildcardAdditionalOptions { opt_exclude: None, opt_except: None, opt_rename: None })], \
into: None, \
from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: \"t1\", quote_style: None }]\
), \
@@ -174,7 +174,7 @@ mod test {
sort_by: [], \
having: None, \
qualify: None \
}), order_by: [], limit: None, offset: None, fetch: None, lock: None }, param_types: [] }))");
}), order_by: [], limit: None, offset: None, fetch: None, locks: [] }, param_types: [] }))");
assert_eq!(format!("{stmt:?}"), expected);
}

View File

@@ -40,13 +40,13 @@ pub use crate::query_engine::state::QueryEngineState;
pub trait QueryEngine: Send + Sync {
fn name(&self) -> &str;
fn statement_to_plan(
async fn statement_to_plan(
&self,
stmt: QueryStatement,
query_ctx: QueryContextRef,
) -> Result<LogicalPlan>;
fn describe(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<Schema>;
async fn describe(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<Schema>;
async fn execute(&self, plan: &LogicalPlan) -> Result<Output>;

View File

@@ -12,21 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// Query engine execution context
use crate::query_engine::state::QueryEngineState;
use datafusion::execution::context::SessionState;
#[derive(Debug)]
pub struct QueryEngineContext {
state: QueryEngineState,
state: SessionState,
}
impl QueryEngineContext {
pub fn new(state: QueryEngineState) -> Self {
pub fn new(state: SessionState) -> Self {
Self { state }
}
#[inline]
pub fn state(&self) -> &QueryEngineState {
pub fn state(&self) -> &SessionState {
&self.state
}
}

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use datafusion_common::TableReference;
use session::context::QueryContextRef;
use snafu::ensure;
@@ -40,34 +39,6 @@ pub fn validate_catalog_and_schema(
Ok(())
}
pub fn validate_table_references(name: TableReference, query_ctx: &QueryContextRef) -> Result<()> {
match name {
TableReference::Bare { .. } => Ok(()),
TableReference::Partial { schema, .. } => {
ensure!(
schema == query_ctx.current_schema(),
QueryAccessDeniedSnafu {
catalog: query_ctx.current_catalog(),
schema: schema.to_string(),
}
);
Ok(())
}
TableReference::Full {
catalog, schema, ..
} => {
ensure!(
catalog == query_ctx.current_catalog() && schema == query_ctx.current_schema(),
QueryAccessDeniedSnafu {
catalog: catalog.to_string(),
schema: schema.to_string(),
}
);
Ok(())
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
@@ -76,47 +47,6 @@ mod tests {
use super::*;
#[test]
fn test_validate_table_ref() {
let context = Arc::new(QueryContext::with("greptime", "public"));
let table_ref = TableReference::Bare {
table: "table_name",
};
let re = validate_table_references(table_ref, &context);
assert!(re.is_ok());
let table_ref = TableReference::Partial {
schema: "public",
table: "table_name",
};
let re = validate_table_references(table_ref, &context);
assert!(re.is_ok());
let table_ref = TableReference::Partial {
schema: "wrong_schema",
table: "table_name",
};
let re = validate_table_references(table_ref, &context);
assert!(re.is_err());
let table_ref = TableReference::Full {
catalog: "greptime",
schema: "public",
table: "table_name",
};
let re = validate_table_references(table_ref, &context);
assert!(re.is_ok());
let table_ref = TableReference::Full {
catalog: "wrong_catalog",
schema: "public",
table: "table_name",
};
let re = validate_table_references(table_ref, &context);
assert!(re.is_err());
}
#[test]
fn test_validate_catalog_and_schema() {
let context = Arc::new(QueryContext::with("greptime", "public"));

View File

@@ -19,28 +19,21 @@ use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use catalog::CatalogListRef;
use common_base::Plugins;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
use common_query::physical_plan::{SessionContext, TaskContext};
use common_query::physical_plan::SessionContext;
use common_query::prelude::ScalarUdf;
use datafusion::catalog::TableReference;
use datafusion::error::Result as DfResult;
use datafusion::execution::context::{QueryPlanner, SessionConfig, SessionState};
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
use datafusion::physical_plan::udf::ScalarUDF;
use datafusion::physical_plan::{ExecutionPlan, PhysicalPlanner};
use datafusion_common::ScalarValue;
use datafusion_expr::{LogicalPlan as DfLogicalPlan, TableSource};
use datafusion_expr::LogicalPlan as DfLogicalPlan;
use datafusion_optimizer::optimizer::Optimizer;
use datafusion_sql::planner::ContextProvider;
use datatypes::arrow::datatypes::DataType;
use promql::extension_plan::PromExtensionPlanner;
use session::context::QueryContextRef;
use crate::datafusion::DfCatalogListAdapter;
use crate::optimizer::TypeConversionRule;
use crate::query_engine::options::{validate_table_references, QueryOptions};
use crate::query_engine::options::QueryOptions;
/// Query engine global state
// TODO(yingwen): This QueryEngineState still relies on datafusion, maybe we can define a trait for it,
@@ -64,16 +57,18 @@ impl fmt::Debug for QueryEngineState {
impl QueryEngineState {
pub fn new(catalog_list: CatalogListRef, plugins: Arc<Plugins>) -> Self {
let runtime_env = Arc::new(RuntimeEnv::default());
let session_config = SessionConfig::new()
.with_default_catalog_and_schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
let session_config = SessionConfig::new().with_create_default_catalog_and_schema(false);
let mut optimizer = Optimizer::new();
// Apply the type conversion rule first.
optimizer.rules.insert(0, Arc::new(TypeConversionRule {}));
let mut session_state = SessionState::with_config_rt(session_config, runtime_env);
session_state.optimizer = optimizer;
session_state.catalog_list = Arc::new(DfCatalogListAdapter::new(catalog_list.clone()));
session_state.query_planner = Arc::new(DfQueryPlanner::new());
let session_state = SessionState::with_config_rt_and_catalog_list(
session_config,
runtime_env,
Arc::new(DfCatalogListAdapter::new(catalog_list.clone())),
)
.with_optimizer_rules(optimizer.rules)
.with_query_planner(Arc::new(DfQueryPlanner::new()));
let df_context = SessionContext::with_state(session_state);
@@ -113,69 +108,15 @@ impl QueryEngineState {
&self.catalog_list
}
#[inline]
pub(crate) fn task_ctx(&self) -> Arc<TaskContext> {
self.df_context.task_ctx()
pub(crate) fn disallow_cross_schema_query(&self) -> bool {
self.plugins
.get::<QueryOptions>()
.map(|x| x.disallow_cross_schema_query)
.unwrap_or(false)
}
pub(crate) fn get_table_provider(
&self,
query_ctx: QueryContextRef,
name: TableReference,
) -> DfResult<Arc<dyn TableSource>> {
let state = self.df_context.state();
if let Some(opts) = self.plugins.get::<QueryOptions>() {
if opts.disallow_cross_schema_query {
validate_table_references(name, &query_ctx)?;
}
}
if let TableReference::Bare { table } = name {
let name = TableReference::Partial {
schema: &query_ctx.current_schema(),
table,
};
state.get_table_provider(name)
} else {
state.get_table_provider(name)
}
}
pub(crate) fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
self.df_context.state().get_function_meta(name)
}
pub(crate) fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType> {
self.df_context.state().get_variable_type(variable_names)
}
pub(crate) fn get_config_option(&self, variable: &str) -> Option<ScalarValue> {
self.df_context.state().get_config_option(variable)
}
pub(crate) fn optimize(&self, plan: &DfLogicalPlan) -> DfResult<DfLogicalPlan> {
self.df_context.optimize(plan)
}
pub(crate) async fn create_physical_plan(
&self,
logical_plan: &DfLogicalPlan,
) -> DfResult<Arc<dyn ExecutionPlan>> {
self.df_context.create_physical_plan(logical_plan).await
}
pub(crate) fn optimize_physical_plan(
&self,
mut plan: Arc<dyn ExecutionPlan>,
) -> DfResult<Arc<dyn ExecutionPlan>> {
let state = self.df_context.state();
let config = &state.config;
for optimizer in &state.physical_optimizers {
plan = optimizer.optimize(plan, config)?;
}
Ok(plan)
pub(crate) fn session_state(&self) -> SessionState {
self.df_context.state()
}
}

View File

@@ -162,7 +162,8 @@ pub async fn explain(
query_ctx: QueryContextRef,
) -> Result<Output> {
let plan = query_engine
.statement_to_plan(QueryStatement::Sql(Statement::Explain(*stmt)), query_ctx)?;
.statement_to_plan(QueryStatement::Sql(Statement::Explain(*stmt)), query_ctx)
.await?;
query_engine.execute(&plan).await
}

View File

@@ -87,6 +87,7 @@ async fn execute_argmax<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -87,6 +87,7 @@ async fn execute_argmin<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -84,6 +84,7 @@ where
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -83,6 +83,7 @@ async fn execute_mean<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -177,7 +177,7 @@ async fn test_my_sum() -> Result<()> {
r#"+--------+
| my_sum |
+--------+
| 9 |
| 9.0 |
+--------+"#,
)
.await?;
@@ -223,6 +223,7 @@ where
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await?;

View File

@@ -58,6 +58,7 @@ async fn test_percentile_correctness() -> Result<()> {
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();
@@ -103,6 +104,7 @@ async fn execute_percentile<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -84,6 +84,7 @@ async fn execute_polyval<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -121,8 +121,8 @@ fn catalog_list() -> Result<Arc<MemoryCatalogManager>> {
Ok(catalog_list)
}
#[test]
fn test_query_validate() -> Result<()> {
#[tokio::test]
async fn test_query_validate() -> Result<()> {
common_telemetry::init_default_ut_logging();
let catalog_list = catalog_list()?;
@@ -137,13 +137,16 @@ fn test_query_validate() -> Result<()> {
let engine = factory.query_engine();
let stmt = QueryLanguageParser::parse_sql("select number from public.numbers").unwrap();
let re = engine.statement_to_plan(stmt, Arc::new(QueryContext::new()));
assert!(re.is_ok());
assert!(engine
.statement_to_plan(stmt, QueryContext::arc())
.await
.is_ok());
let stmt = QueryLanguageParser::parse_sql("select number from wrongschema.numbers").unwrap();
let re = engine.statement_to_plan(stmt, Arc::new(QueryContext::new()));
assert!(re.is_err());
assert!(engine
.statement_to_plan(stmt, QueryContext::arc())
.await
.is_err());
Ok(())
}
@@ -176,6 +179,7 @@ async fn test_udf() -> Result<()> {
.unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await?;

View File

@@ -83,6 +83,7 @@ async fn execute_scipy_stats_norm_cdf<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -83,6 +83,7 @@ async fn execute_scipy_stats_norm_pdf<'a>(
let stmt = QueryLanguageParser::parse_sql(&sql).unwrap();
let plan = engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let output = engine.execute(&plan).await.unwrap();

View File

@@ -135,6 +135,7 @@ impl TimeRangeTester {
&self
.engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap(),
)
.await

View File

@@ -376,14 +376,16 @@ impl PyQueryEngine {
let thread_handle = std::thread::spawn(move || -> std::result::Result<_, String> {
if let Some(engine) = query {
let stmt = QueryLanguageParser::parse_sql(s.as_str()).map_err(|e| e.to_string())?;
let plan = engine
.statement_to_plan(stmt, Default::default())
.map_err(|e| e.to_string())?;
// To prevent the error of nested creating Runtime, if is nested, use the parent runtime instead
let rt = tokio::runtime::Runtime::new().map_err(|e| e.to_string())?;
let handle = rt.handle().clone();
let res = handle.block_on(async {
let plan = engine
.statement_to_plan(stmt, Default::default())
.await
.map_err(|e| e.to_string())?;
let res = engine
.clone()
.execute(&plan)

View File

@@ -241,7 +241,8 @@ impl Script for PyScript {
);
let plan = self
.query_engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))?;
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await?;
let res = self.query_engine.execute(&plan).await?;
let copr = self.copr.clone();
match res {

View File

@@ -126,6 +126,7 @@ impl ScriptsTable {
DEFAULT_SCHEMA_NAME,
SCRIPTS_TABLE_NAME,
)
.await
.context(FindScriptsTableSnafu)?
.context(ScriptsTableNotFoundSnafu)?;
@@ -160,6 +161,7 @@ impl ScriptsTable {
let plan = self
.query_engine
.statement_to_plan(stmt, Arc::new(QueryContext::new()))
.await
.unwrap();
let stream = match self

View File

@@ -103,7 +103,7 @@ impl FlightService for FlightHandler {
async fn do_get(&self, request: Request<Ticket>) -> TonicResult<Response<Self::DoGetStream>> {
let ticket = request.into_inner().ticket;
let request =
GreptimeRequest::decode(ticket.as_slice()).context(error::InvalidFlightTicketSnafu)?;
GreptimeRequest::decode(ticket.as_ref()).context(error::InvalidFlightTicketSnafu)?;
let query = request.request.context(error::InvalidQuerySnafu {
reason: "Expecting non-empty GreptimeRequest.",

View File

@@ -604,7 +604,7 @@ mod test {
unimplemented!()
}
fn do_describe(
async fn do_describe(
&self,
_stmt: sql::statements::statement::Statement,
_query_ctx: QueryContextRef,

View File

@@ -441,6 +441,7 @@ impl ExtendedQueryHandler for PostgresServerHandler {
if let Some(schema) = self
.query_handler
.do_describe(stmt.clone(), self.query_ctx.clone())
.await
.map_err(|e| PgWireError::ApiError(Box::new(e)))?
{
schema_to_pg(&schema, FieldFormat::Binary)

View File

@@ -50,7 +50,7 @@ pub trait SqlQueryHandler {
) -> std::result::Result<Output, Self::Error>;
// TODO(LFC): revisit this for mysql prepared statement
fn do_describe(
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
@@ -122,9 +122,14 @@ where
.context(error::ExecuteStatementSnafu)
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
self.0
.do_describe(stmt, query_ctx)
.await
.map_err(BoxedError::new)
.context(error::DescribeStatementSnafu)
}

View File

@@ -72,7 +72,7 @@ impl SqlQueryHandler for DummyInstance {
unimplemented!()
}
fn do_describe(
async fn do_describe(
&self,
_stmt: sql::statements::statement::Statement,
_query_ctx: QueryContextRef,

View File

@@ -70,7 +70,7 @@ impl SqlQueryHandler for DummyInstance {
unimplemented!()
}
fn do_describe(
async fn do_describe(
&self,
_stmt: sql::statements::statement::Statement,
_query_ctx: QueryContextRef,

View File

@@ -95,7 +95,7 @@ impl SqlQueryHandler for DummyInstance {
unimplemented!()
}
fn do_describe(
async fn do_describe(
&self,
_stmt: sql::statements::statement::Statement,
_query_ctx: QueryContextRef,

View File

@@ -72,6 +72,7 @@ impl SqlQueryHandler for DummyInstance {
let plan = self
.query_engine
.statement_to_plan(stmt, query_ctx)
.await
.unwrap();
let output = self.query_engine.execute(&plan).await.unwrap();
vec![Ok(output)]
@@ -93,11 +94,16 @@ impl SqlQueryHandler for DummyInstance {
unimplemented!()
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
if let Statement::Query(_) = stmt {
let schema = self
.query_engine
.describe(QueryStatement::Sql(stmt), query_ctx)
.await
.unwrap();
Ok(Some(schema))
} else {

View File

@@ -11,6 +11,7 @@ common-base = { path = "../common/base" }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }
datafusion-sql.workspace = true
datatypes = { path = "../datatypes" }
hex = "0.4"
itertools = "0.10"

View File

@@ -138,6 +138,12 @@ pub enum Error {
#[snafu(display("Unsupported format option: {}", name))]
UnsupportedCopyFormatOption { name: String },
#[snafu(display("Unable to convert statement {} to DataFusion statement", statement))]
ConvertToDfStatement {
statement: String,
backtrace: Backtrace,
},
}
impl ErrorExt for Error {
@@ -167,6 +173,7 @@ impl ErrorExt for Error {
UnsupportedAlterTableStatement { .. } => StatusCode::InvalidSyntax,
SerializeColumnDefaultConstraint { source, .. } => source.status_code(),
ConvertToGrpcDataType { source, .. } => source.status_code(),
ConvertToDfStatement { .. } => StatusCode::Internal,
}
}

View File

@@ -16,11 +16,9 @@ use snafu::{ensure, ResultExt};
use sqlparser::dialect::Dialect;
use sqlparser::keywords::Keyword;
use sqlparser::parser::{Parser, ParserError};
use sqlparser::tokenizer::{Token, Tokenizer};
use sqlparser::tokenizer::{Token, TokenWithLocation};
use crate::error::{
self, InvalidDatabaseNameSnafu, InvalidTableNameSnafu, Result, SyntaxSnafu, TokenizerSnafu,
};
use crate::error::{self, InvalidDatabaseNameSnafu, InvalidTableNameSnafu, Result, SyntaxSnafu};
use crate::parsers::tql_parser;
use crate::statements::describe::DescribeTable;
use crate::statements::drop::DropTable;
@@ -38,14 +36,11 @@ impl<'a> ParserContext<'a> {
/// Parses SQL with given dialect
pub fn create_with_dialect(sql: &'a str, dialect: &dyn Dialect) -> Result<Vec<Statement>> {
let mut stmts: Vec<Statement> = Vec::new();
let mut tokenizer = Tokenizer::new(dialect, sql);
let tokens: Vec<Token> = tokenizer.tokenize().context(TokenizerSnafu { sql })?;
let mut parser_ctx = ParserContext {
sql,
parser: Parser::new(tokens, dialect),
};
let parser = Parser::new(dialect)
.try_with_sql(sql)
.context(SyntaxSnafu { sql })?;
let mut parser_ctx = ParserContext { sql, parser };
let mut expecting_statement_delimiter = false;
loop {
@@ -71,7 +66,7 @@ impl<'a> ParserContext<'a> {
/// Parses parser context to a set of statements.
pub fn parse_statement(&mut self) -> Result<Statement> {
match self.parser.peek_token() {
match self.parser.peek_token().token {
Token::Word(w) => {
match w.keyword {
Keyword::CREATE => {
@@ -185,7 +180,7 @@ impl<'a> ParserContext<'a> {
}
fn parse_show_tables(&mut self) -> Result<Statement> {
let database = match self.parser.peek_token() {
let database = match self.parser.peek_token().token {
Token::EOF | Token::SemiColon => {
return Ok(Statement::ShowTables(ShowTables {
kind: ShowKind::All,
@@ -220,7 +215,7 @@ impl<'a> ParserContext<'a> {
_ => None,
};
let kind = match self.parser.peek_token() {
let kind = match self.parser.peek_token().token {
Token::EOF | Token::SemiColon => ShowKind::All,
// SHOW TABLES [WHERE | LIKE] [EXPR]
Token::Word(w) => match w.keyword {
@@ -319,7 +314,7 @@ impl<'a> ParserContext<'a> {
}
// Report unexpected token
pub(crate) fn expected<T>(&self, expected: &str, found: Token) -> Result<T> {
pub(crate) fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T> {
Err(ParserError::ParserError(format!(
"Expected {expected}, found: {found}",
)))
@@ -327,7 +322,7 @@ impl<'a> ParserContext<'a> {
}
pub fn matches_keyword(&mut self, expected: Keyword) -> bool {
match self.parser.peek_token() {
match self.parser.peek_token().token {
Token::Word(w) => w.keyword == expected,
_ => false,
}
@@ -349,7 +344,7 @@ impl<'a> ParserContext<'a> {
/// Parses `SHOW DATABASES` statement.
pub fn parse_show_databases(&mut self) -> Result<Statement> {
let tok = self.parser.next_token();
let tok = self.parser.next_token().token;
match &tok {
Token::EOF | Token::SemiColon => {
Ok(Statement::ShowDatabases(ShowDatabases::new(ShowKind::All)))
@@ -563,7 +558,7 @@ mod tests {
limit: None,
offset: None,
fetch: None,
lock: None,
locks: vec![],
}));
let explain = Explain::try_from(SpStatement::Explain {

View File

@@ -22,7 +22,7 @@ use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Value};
use sqlparser::dialect::keywords::Keyword;
use sqlparser::parser::IsOptional::Mandatory;
use sqlparser::parser::{Parser, ParserError};
use sqlparser::tokenizer::{Token, Word};
use sqlparser::tokenizer::{Token, TokenWithLocation, Word};
use crate::ast::{ColumnDef, Ident, TableConstraint, Value as SqlValue};
use crate::error::{
@@ -45,7 +45,7 @@ static THAN: Lazy<Token> = Lazy::new(|| Token::make_keyword("THAN"));
/// Parses create [table] statement
impl<'a> ParserContext<'a> {
pub(crate) fn parse_create(&mut self) -> Result<Statement> {
match self.parser.peek_token() {
match self.parser.peek_token().token {
Token::Word(w) => match w.keyword {
Keyword::TABLE => self.parse_create_table(),
@@ -135,7 +135,7 @@ impl<'a> ParserContext<'a> {
let column_list = self
.parser
.parse_parenthesized_column_list(Mandatory)
.parse_parenthesized_column_list(Mandatory, false)
.context(error::SyntaxSnafu { sql: self.sql })?;
let entries = self.parse_comma_separated(Self::parse_partition_entry)?;
@@ -172,7 +172,7 @@ impl<'a> ParserContext<'a> {
}
fn parse_value_list(&mut self) -> Result<SqlValue> {
let token = self.parser.peek_token();
let token = self.parser.peek_token().token;
let value = match token {
Token::Word(Word { value, .. }) if value == MAXVALUE => {
let _ = self.parser.next_token();
@@ -228,7 +228,7 @@ impl<'a> ParserContext<'a> {
loop {
if let Some(constraint) = self.parse_optional_table_constraint()? {
constraints.push(constraint);
} else if let Token::Word(_) = self.parser.peek_token() {
} else if let Token::Word(_) = self.parser.peek_token().token {
self.parse_column(&mut columns, &mut constraints)?;
} else {
return self.expected(
@@ -387,7 +387,10 @@ impl<'a> ParserContext<'a> {
Ok(Some(ColumnOption::NotNull))
} else if parser.parse_keywords(&[Keyword::COMMENT]) {
match parser.next_token() {
Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))),
TokenWithLocation {
token: Token::SingleQuotedString(value, ..),
..
} => Ok(Some(ColumnOption::Comment(value))),
unexpected => parser.expected("string", unexpected),
}
} else if parser.parse_keyword(Keyword::NULL) {
@@ -428,7 +431,10 @@ impl<'a> ParserContext<'a> {
None
};
match self.parser.next_token() {
Token::Word(w) if w.keyword == Keyword::PRIMARY => {
TokenWithLocation {
token: Token::Word(w),
..
} if w.keyword == Keyword::PRIMARY => {
self.parser
.expect_keyword(Keyword::KEY)
.context(error::UnexpectedSnafu {
@@ -438,7 +444,7 @@ impl<'a> ParserContext<'a> {
})?;
let columns = self
.parser
.parse_parenthesized_column_list(Mandatory)
.parse_parenthesized_column_list(Mandatory, false)
.context(error::SyntaxSnafu { sql: self.sql })?;
Ok(Some(TableConstraint::Unique {
name,
@@ -446,7 +452,10 @@ impl<'a> ParserContext<'a> {
is_primary: true,
}))
}
Token::Word(w) if w.keyword == Keyword::TIME => {
TokenWithLocation {
token: Token::Word(w),
..
} if w.keyword == Keyword::TIME => {
self.parser
.expect_keyword(Keyword::INDEX)
.context(error::UnexpectedSnafu {
@@ -457,7 +466,7 @@ impl<'a> ParserContext<'a> {
let columns = self
.parser
.parse_parenthesized_column_list(Mandatory)
.parse_parenthesized_column_list(Mandatory, false)
.context(error::SyntaxSnafu { sql: self.sql })?;
ensure!(
@@ -503,9 +512,11 @@ impl<'a> ParserContext<'a> {
actual: self.peek_token_as_string(),
})?;
match self.parser.next_token() {
Token::Word(w) => Ok(w.value),
unexpected => self.expected("Engine is missing", unexpected),
let token = self.parser.next_token();
if let Token::Word(w) = token.token {
Ok(w.value)
} else {
self.expected("'Engine' is missing", token)
}
}
}

View File

@@ -35,7 +35,7 @@ impl<'a> ParserContext<'a> {
pub(crate) fn parse_tql(&mut self) -> Result<Statement> {
self.parser.next_token();
match self.parser.peek_token() {
match self.parser.peek_token().token {
Token::Word(w) => {
let uppercase = w.value.to_uppercase();
match w.keyword {
@@ -80,7 +80,7 @@ impl<'a> ParserContext<'a> {
parser: &mut Parser,
token: Token,
) -> std::result::Result<String, ParserError> {
let value = match parser.next_token() {
let value = match parser.next_token().token {
Token::Number(n, _) => n,
Token::DoubleQuotedString(s) | Token::SingleQuotedString(s) => s,
unexpected => {

View File

@@ -12,6 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use datafusion_sql::parser::Statement as DfStatement;
use sqlparser::ast::Statement as SpStatement;
use crate::error::{ConvertToDfStatementSnafu, Error};
use crate::statements::alter::AlterTable;
use crate::statements::copy::CopyTable;
use crate::statements::create::{CreateDatabase, CreateTable};
@@ -67,3 +71,21 @@ pub struct Hint {
pub comment: String,
pub prefix: String,
}
impl TryFrom<&Statement> for DfStatement {
type Error = Error;
fn try_from(s: &Statement) -> Result<Self, Self::Error> {
let s = match s {
Statement::Query(query) => SpStatement::Query(Box::new(query.inner.clone())),
Statement::Explain(explain) => explain.inner.clone(),
_ => {
return ConvertToDfStatementSnafu {
statement: format!("{s:?}"),
}
.fail();
}
};
Ok(DfStatement::Statement(Box::new(s)))
}
}

View File

@@ -237,6 +237,7 @@ impl CreateTableProcedure {
})?;
let table_exists = schema
.table(&self.data.request.table_name)
.await
.map_err(Error::external)?
.is_some();
if table_exists {

View File

@@ -162,10 +162,6 @@ impl<'a> TimeRangePredicateBuilder<'a> {
| Operator::Multiply
| Operator::Divide
| Operator::Modulo
| Operator::Like
| Operator::NotLike
| Operator::ILike
| Operator::NotILike
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::RegexMatch

View File

@@ -211,7 +211,7 @@ async fn insert_and_assert(db: &Database) {
| host2 | | 0.2 | 1970-01-01T00:00:00.101 |
| host3 | 0.41 | | 1970-01-01T00:00:00.102 |
| host4 | 0.2 | 0.3 | 1970-01-01T00:00:00.103 |
| host5 | 66.6 | 1024 | 2022-12-28T04:17:07 |
| host5 | 66.6 | 1024.0 | 2022-12-28T04:17:07 |
| host6 | 88.8 | 333.3 | 2022-12-28T04:17:08 |
+-------+------+--------+-------------------------+\
";

View File

@@ -196,7 +196,7 @@ pub async fn test_sql_api(store_type: StorageType) {
let body = serde_json::from_str::<JsonResponse>(&res.text().await).unwrap();
assert!(!body.success());
assert!(body.execution_time_ms().is_some());
assert!(body.error().unwrap().contains("not found"));
assert!(body.error().unwrap().contains("not exist"));
// test database given
let res = client

View File

@@ -24,11 +24,11 @@ Affected Rows: 4
SELECT sum(n) from doubles;
+------------------+
| SUM(doubles.n) |
+------------------+
| 9007199254740992 |
+------------------+
+----------------------+
| SUM(doubles.n) |
+----------------------+
| 9.007199254740992e15 |
+----------------------+
DROP TABLE bigints;

View File

@@ -101,7 +101,7 @@ Error: 1001(Unsupported), SQL statement is not supported: DROP SCHEMA test_publ
SELECT * FROM test_public_schema.hello;
Error: 3000(PlanQuery), Error during planning: table 'greptime.test_public_schema.hello' not found
Error: 4001(TableNotFound), Table `greptime.test_public_schema.hello` not exist
USE public;

View File

@@ -64,11 +64,11 @@ Affected Rows: 4
SELECT sum(n) from doubles;
+------------------+
| SUM(doubles.n) |
+------------------+
| 9007199254740992 |
+------------------+
+----------------------+
| SUM(doubles.n) |
+----------------------+
| 9.007199254740992e15 |
+----------------------+
DROP TABLE bigints;

Some files were not shown because too many files have changed in this diff Show More