diff --git a/Cargo.lock b/Cargo.lock index d4f44c1d83..596712e6e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -286,11 +286,10 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fab9e93ba8ce88a37d5a30dce4b9913b75413dc1ac56cb5d72e5a840543f829" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" dependencies = [ - "ahash 0.8.6", "arrow-arith", "arrow-array", "arrow-buffer", @@ -309,9 +308,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" dependencies = [ "arrow-array", "arrow-buffer", @@ -324,9 +323,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" dependencies = [ "ahash 0.8.6", "arrow-buffer", @@ -341,9 +340,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" dependencies = [ "bytes", "half 2.3.1", @@ -352,27 +351,30 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "atoi 2.0.0", + "base64 0.22.0", "chrono", "comfy-table", "half 2.3.1", "lexical-core", "num", + "ryu", ] [[package]] name = "arrow-csv" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ef855dc6b126dc197f43e061d4de46b9d4c033aa51c2587657f7508242cef1" +checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" dependencies = [ "arrow-array", "arrow-buffer", @@ -389,9 +391,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" dependencies = [ "arrow-buffer", "arrow-schema", @@ -401,29 +403,30 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd938ea4a0e8d0db2b9f47ebba792f73f6188f4289707caeaf93a3be705e5ed5" +checksum = "3241ce691192d789b7b94f56a10e166ee608bdc3932c759eb0b85f09235352bb" dependencies = [ "arrow-array", "arrow-buffer", "arrow-cast", "arrow-ipc", "arrow-schema", - "base64 0.21.5", + "base64 0.22.0", "bytes", "futures", "paste", "prost 0.12.3", + "prost-types 0.12.3", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", ] [[package]] name = "arrow-ipc" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" dependencies = [ "arrow-array", "arrow-buffer", @@ -431,14 +434,14 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", - "lz4", + "lz4_flex 0.11.2", ] [[package]] name = "arrow-json" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03d7e3b04dd688ccec354fe449aed56b831679f03e44ee2c1cfc4045067b69c" +checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" dependencies = [ "arrow-array", "arrow-buffer", @@ -447,7 +450,7 @@ dependencies = [ "arrow-schema", "chrono", "half 2.3.1", - "indexmap 2.1.0", + "indexmap 2.2.6", "lexical-core", "num", "serde", @@ -456,9 +459,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" dependencies = [ "arrow-array", "arrow-buffer", @@ -471,9 +474,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" dependencies = [ "ahash 0.8.6", "arrow-array", @@ -486,9 +489,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" dependencies = [ "bitflags 2.4.1", "serde", @@ -496,9 +499,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" dependencies = [ "ahash 0.8.6", "arrow-array", @@ -510,18 +513,19 @@ dependencies = [ [[package]] name = "arrow-string" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cebbb282d6b9244895f4a9a912e55e57bce112554c7fa91fcec5459cb421ab" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "memchr", "num", "regex", - "regex-syntax 0.7.5", + "regex-syntax 0.8.2", ] [[package]] @@ -604,7 +608,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -626,7 +630,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -637,7 +641,7 @@ checksum = "fdf6721fb0140e4f897002dd086c06f6c27775df19cfe1fccb21181a48fd2c98" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -662,6 +666,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic" version = "0.5.3" @@ -787,10 +800,10 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdca6a10ecad987bda04e95606ef85a5417dcaac1a78455242d72e031e2b6b62" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -927,7 +940,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -1029,7 +1042,7 @@ dependencies = [ "proc-macro-crate 2.0.0", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", "syn_derive", ] @@ -1352,9 +1365,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.33" +version = "0.4.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" +checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" dependencies = [ "android-tzdata", "iana-time-zone", @@ -1496,10 +1509,10 @@ version = "4.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -1551,7 +1564,7 @@ dependencies = [ "substrait 0.7.2", "tokio", "tokio-stream", - "tonic 0.10.2", + "tonic 0.11.0", "tracing", "tracing-subscriber", ] @@ -1832,7 +1845,7 @@ dependencies = [ "rand", "snafu", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", "tower", ] @@ -1865,7 +1878,7 @@ dependencies = [ "snafu", "static_assertions", "syn 1.0.109", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -1925,7 +1938,7 @@ dependencies = [ "strum 0.25.0", "table", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", "typetag", "uuid", ] @@ -1984,8 +1997,8 @@ dependencies = [ "datatypes", "serde", "snafu", - "sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", - "sqlparser_derive 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", + "sqlparser_derive 0.1.1", "statrs", "tokio", ] @@ -1997,6 +2010,7 @@ dependencies = [ "arc-swap", "common-error", "common-macro", + "common-telemetry", "datafusion", "datafusion-common", "datatypes", @@ -2039,7 +2053,7 @@ dependencies = [ "greptime-proto", "lazy_static", "once_cell", - "opentelemetry 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "opentelemetry 0.21.0", "opentelemetry-otlp", "opentelemetry-semantic-conventions", "opentelemetry_sdk 0.21.1", @@ -2286,9 +2300,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if 1.0.0", ] @@ -2488,7 +2502,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -2510,7 +2524,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core 0.20.3", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -2534,12 +2548,13 @@ checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" [[package]] name = "datafusion" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "ahash 0.8.6", "arrow", "arrow-array", + "arrow-ipc", "arrow-schema", "async-compression 0.4.5", "async-trait", @@ -2548,8 +2563,12 @@ dependencies = [ "chrono", "dashmap", "datafusion-common", + "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-array", "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-plan", @@ -2559,17 +2578,16 @@ dependencies = [ "glob", "half 2.3.1", "hashbrown 0.14.3", - "indexmap 2.1.0", - "itertools 0.11.0", + "indexmap 2.2.6", + "itertools 0.12.0", "log", "num_cpus", "object_store", "parking_lot 0.12.1", "parquet", - "percent-encoding", "pin-project-lite", "rand", - "sqlparser 0.38.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile", "tokio", "tokio-util", @@ -2581,8 +2599,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "ahash 0.8.6", "arrow", @@ -2591,16 +2609,26 @@ dependencies = [ "arrow-schema", "chrono", "half 2.3.1", + "instant", + "libc", "num_cpus", "object_store", "parquet", - "sqlparser 0.38.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "datafusion-common-runtime" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" +dependencies = [ + "tokio", ] [[package]] name = "datafusion-execution" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "arrow", "chrono", @@ -2619,61 +2647,40 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "ahash 0.8.6", "arrow", "arrow-array", - "datafusion-common", - "sqlparser 0.38.0 (registry+https://github.com/rust-lang/crates.io-index)", - "strum 0.25.0", - "strum_macros 0.25.3", -] - -[[package]] -name = "datafusion-optimizer" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" -dependencies = [ - "arrow", - "async-trait", "chrono", "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "hashbrown 0.14.3", - "itertools 0.11.0", - "log", - "regex-syntax 0.8.2", + "paste", + "serde_json", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", + "strum 0.26.2", + "strum_macros 0.26.2", ] [[package]] -name = "datafusion-physical-expr" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +name = "datafusion-functions" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ - "ahash 0.8.6", "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "base64 0.21.5", + "base64 0.22.0", "blake2", "blake3", "chrono", "datafusion-common", + "datafusion-execution", "datafusion-expr", - "half 2.3.1", + "datafusion-physical-expr", "hashbrown 0.14.3", "hex", - "indexmap 2.1.0", - "itertools 0.11.0", - "libc", + "itertools 0.12.0", "log", "md-5", - "paste", - "petgraph", "rand", "regex", "sha2", @@ -2682,62 +2689,156 @@ dependencies = [ ] [[package]] -name = "datafusion-physical-plan" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +name = "datafusion-functions-aggregate" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-array" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "itertools 0.12.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-optimizer" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.3", + "itertools 0.12.0", + "log", + "regex-syntax 0.8.2", +] + +[[package]] +name = "datafusion-physical-expr" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "ahash 0.8.6", "arrow", "arrow-array", "arrow-buffer", + "arrow-ord", "arrow-schema", - "async-trait", + "arrow-string", + "base64 0.22.0", "chrono", "datafusion-common", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "half 2.3.1", + "hashbrown 0.14.3", + "hex", + "indexmap 2.2.6", + "itertools 0.12.0", + "log", + "paste", + "petgraph", + "regex", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", +] + +[[package]] +name = "datafusion-physical-plan" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" +dependencies = [ + "ahash 0.8.6", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", "datafusion-physical-expr", + "datafusion-physical-expr-common", "futures", "half 2.3.1", "hashbrown 0.14.3", - "indexmap 2.1.0", - "itertools 0.11.0", + "indexmap 2.2.6", + "itertools 0.12.0", "log", "once_cell", "parking_lot 0.12.1", "pin-project-lite", "rand", "tokio", - "uuid", ] [[package]] name = "datafusion-sql" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "arrow", + "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", "log", - "sqlparser 0.38.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", + "strum 0.26.2", ] [[package]] name = "datafusion-substrait" -version = "32.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=26e43acac3a96cec8dd4c8365f22dfb1a84306e9#26e43acac3a96cec8dd4c8365f22dfb1a84306e9" +version = "37.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=34eda15b73a9e278af8844b30ed2f1c21c10359c#34eda15b73a9e278af8844b30ed2f1c21c10359c" dependencies = [ "async-recursion", "chrono", "datafusion", - "itertools 0.11.0", + "itertools 0.12.0", "object_store", "prost 0.12.3", "prost-types 0.12.3", - "substrait 0.17.1", - "tokio", + "substrait 0.30.0", ] [[package]] @@ -2794,7 +2895,7 @@ dependencies = [ "table", "tokio", "toml 0.8.8", - "tonic 0.10.2", + "tonic 0.11.0", ] [[package]] @@ -2881,7 +2982,7 @@ checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -2892,7 +2993,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3147,7 +3248,7 @@ checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3159,7 +3260,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3208,15 +3309,14 @@ dependencies = [ [[package]] name = "etcd-client" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5231ad671c74ee5dc02753a0a9c855fe6e90de2a07acb2582f8a702470e04d1" +version = "0.12.4" +source = "git+https://github.com/MichaelScofield/etcd-client.git?rev=4c371e9b3ea8e0a8ee2f9cbd7ded26e54a45df3b#4c371e9b3ea8e0a8ee2f9cbd7ded26e54a45df3b" dependencies = [ "http", "prost 0.12.3", "tokio", "tokio-stream", - "tonic 0.10.2", + "tonic 0.11.0", "tonic-build 0.10.2", "tower", "tower-service", @@ -3301,6 +3401,7 @@ dependencies = [ "common-test-util", "common-time", "datafusion", + "datafusion-expr", "datatypes", "futures", "object-store", @@ -3416,7 +3517,7 @@ dependencies = [ "substrait 0.7.2", "table", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", ] [[package]] @@ -3480,7 +3581,7 @@ dependencies = [ "log-store", "meta-client", "meta-srv", - "opentelemetry-proto 0.3.0", + "opentelemetry-proto 0.5.0", "operator", "partition", "prometheus", @@ -3493,13 +3594,13 @@ dependencies = [ "session", "snafu", "sql", - "sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", + "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", "store-api", "strfmt", "table", "tokio", "toml 0.8.8", - "tonic 0.10.2", + "tonic 0.11.0", "tower", "uuid", ] @@ -3529,7 +3630,7 @@ checksum = "b0fa992f1656e1707946bbba340ad244f0814009ef8c0118eb7b658395f19a2e" dependencies = [ "frunk_proc_macro_helpers", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3541,7 +3642,7 @@ dependencies = [ "frunk_core", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3553,7 +3654,7 @@ dependencies = [ "frunk_core", "frunk_proc_macro_helpers", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3654,7 +3755,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -3755,9 +3856,9 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "git2" -version = "0.18.1" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" dependencies = [ "bitflags 2.4.1", "libc", @@ -3775,22 +3876,22 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=04d78b6e025ceb518040fdd10858c2a9d9345820#04d78b6e025ceb518040fdd10858c2a9d9345820" +source = "git+https://github.com/MichaelScofield/greptime-proto.git?rev=bdbd4cfa871ec8d192d3dbabf11debcb2cb67748#bdbd4cfa871ec8d192d3dbabf11debcb2cb67748" dependencies = [ "prost 0.12.3", "serde", "serde_json", "strum 0.25.0", "strum_macros 0.25.3", - "tonic 0.10.2", - "tonic-build 0.10.2", + "tonic 0.11.0", + "tonic-build 0.11.0", ] [[package]] name = "h2" -version = "0.3.26" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -3798,7 +3899,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 2.1.0", + "indexmap 2.2.6", "slab", "tokio", "tokio-util", @@ -3905,6 +4006,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -3981,9 +4088,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", @@ -4092,7 +4199,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -4107,7 +4214,7 @@ dependencies = [ "rust-sitter", "rust-sitter-tool", "slotmap", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -4126,7 +4233,7 @@ dependencies = [ "serde", "serde_json", "slotmap", - "syn 2.0.43", + "syn 2.0.55", "webbrowser", ] @@ -4140,7 +4247,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -4297,9 +4404,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.1.0" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -4321,9 +4428,9 @@ dependencies = [ [[package]] name = "indoc" -version = "1.0.9" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "inferno" @@ -4332,7 +4439,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" dependencies = [ "ahash 0.8.6", - "indexmap 2.1.0", + "indexmap 2.2.6", "is-terminal", "itoa", "log", @@ -4579,7 +4686,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee7893dab2e44ae5f9d0173f26ff4aa327c10b01b06a72b52dd9405b628640d" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.6", ] [[package]] @@ -4961,6 +5068,15 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lz4_flex" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +dependencies = [ + "twox-hash", +] + [[package]] name = "lzma-sys" version = "0.1.20" @@ -5061,9 +5177,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memcomparable" @@ -5141,7 +5257,7 @@ dependencies = [ "snafu", "tokio", "tokio-stream", - "tonic 0.10.2", + "tonic 0.11.0", "tower", "tracing", "tracing-subscriber", @@ -5195,7 +5311,7 @@ dependencies = [ "tokio", "tokio-stream", "toml 0.8.8", - "tonic 0.10.2", + "tonic 0.11.0", "tower", "tracing", "tracing-subscriber", @@ -5434,13 +5550,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f" dependencies = [ "darling 0.20.3", - "heck", + "heck 0.4.1", "num-bigint", "proc-macro-crate 1.3.1", "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", "termcolor", "thiserror", ] @@ -5452,13 +5568,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c60492b5eb751e55b42d716b6b26dceb66767996cd7a5560a842fbf613ca2e92" dependencies = [ "darling 0.20.3", - "heck", + "heck 0.4.1", "num-bigint", "proc-macro-crate 3.1.0", "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", "termcolor", "thiserror", ] @@ -5782,7 +5898,7 @@ checksum = "cfb77679af88f8b125209d354a202862602672222e7f2313fdd6dc349bad4712" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -5923,16 +6039,16 @@ dependencies = [ [[package]] name = "object_store" -version = "0.7.1" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" +checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" dependencies = [ "async-trait", "bytes", "chrono", "futures", "humantime", - "itertools 0.11.0", + "itertools 0.12.0", "parking_lot 0.12.1", "percent-encoding", "snafu", @@ -6026,7 +6142,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a" dependencies = [ "futures-core", "futures-sink", - "indexmap 2.1.0", + "indexmap 2.2.6", "js-sys", "once_cell", "pin-project-lite", @@ -6036,12 +6152,12 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.21.0" -source = "git+https://github.com/waynexia/opentelemetry-rust.git?rev=33841b38dda79b15f2024952be5f32533325ca02#33841b38dda79b15f2024952be5f32533325ca02" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" dependencies = [ "futures-core", "futures-sink", - "indexmap 2.1.0", "js-sys", "once_cell", "pin-project-lite", @@ -6058,7 +6174,7 @@ dependencies = [ "async-trait", "futures-core", "http", - "opentelemetry 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "opentelemetry 0.21.0", "opentelemetry-proto 0.4.0", "opentelemetry-semantic-conventions", "opentelemetry_sdk 0.21.1", @@ -6068,55 +6184,37 @@ dependencies = [ "tonic 0.9.2", ] -[[package]] -name = "opentelemetry-proto" -version = "0.3.0" -source = "git+https://github.com/waynexia/opentelemetry-rust.git?rev=33841b38dda79b15f2024952be5f32533325ca02#33841b38dda79b15f2024952be5f32533325ca02" -dependencies = [ - "opentelemetry 0.21.0 (git+https://github.com/waynexia/opentelemetry-rust.git?rev=33841b38dda79b15f2024952be5f32533325ca02)", - "opentelemetry_sdk 0.20.0", - "prost 0.12.3", - "tonic 0.10.2", -] - [[package]] name = "opentelemetry-proto" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2e155ce5cc812ea3d1dffbd1539aed653de4bf4882d60e6e04dcf0901d674e1" dependencies = [ - "opentelemetry 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "opentelemetry 0.21.0", "opentelemetry_sdk 0.21.1", "prost 0.11.9", "tonic 0.9.2", ] +[[package]] +name = "opentelemetry-proto" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8fddc9b68f5b80dae9d6f510b88e02396f006ad48cac349411fbecc80caae4" +dependencies = [ + "opentelemetry 0.22.0", + "opentelemetry_sdk 0.22.1", + "prost 0.12.3", + "tonic 0.11.0", +] + [[package]] name = "opentelemetry-semantic-conventions" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5774f1ef1f982ef2a447f6ee04ec383981a3ab99c8e77a1a7b30182e65bbc84" dependencies = [ - "opentelemetry 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "opentelemetry_sdk" -version = "0.20.0" -source = "git+https://github.com/waynexia/opentelemetry-rust.git?rev=33841b38dda79b15f2024952be5f32533325ca02#33841b38dda79b15f2024952be5f32533325ca02" -dependencies = [ - "async-trait", - "crossbeam-channel", - "futures-channel", - "futures-executor", - "futures-util", - "glob", - "once_cell", - "opentelemetry 0.21.0 (git+https://github.com/waynexia/opentelemetry-rust.git?rev=33841b38dda79b15f2024952be5f32533325ca02)", - "ordered-float 4.2.0", - "percent-encoding", - "rand", - "thiserror", + "opentelemetry 0.21.0", ] [[package]] @@ -6132,7 +6230,7 @@ dependencies = [ "futures-util", "glob", "once_cell", - "opentelemetry 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "opentelemetry 0.21.0", "ordered-float 4.2.0", "percent-encoding", "rand", @@ -6141,6 +6239,26 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "opentelemetry_sdk" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e90c7113be649e31e9a0f8b5ee24ed7a16923b322c3c5ab6367469c049d6b7e" +dependencies = [ + "async-trait", + "crossbeam-channel", + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "once_cell", + "opentelemetry 0.22.0", + "ordered-float 4.2.0", + "percent-encoding", + "rand", + "thiserror", +] + [[package]] name = "operator" version = "0.7.2" @@ -6186,11 +6304,11 @@ dependencies = [ "session", "snafu", "sql", - "sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", + "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", "store-api", "table", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", ] [[package]] @@ -6202,8 +6320,7 @@ checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" [[package]] name = "orc-rust" version = "0.2.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900310981898f6e3877286f1272b75f5c4a604628594a0a7026311b93a2aa5e6" +source = "git+https://github.com/MichaelScofield/orc-rs.git?rev=17347f5f084ac937863317df882218055c4ea8c1#17347f5f084ac937863317df882218055c4ea8c1" dependencies = [ "arrow", "bytes", @@ -6375,9 +6492,9 @@ dependencies = [ [[package]] name = "parquet" -version = "47.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d" +checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" dependencies = [ "ahash 0.8.6", "arrow-array", @@ -6387,14 +6504,15 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64 0.21.5", + "base64 0.22.0", "brotli", "bytes", "chrono", "flate2", "futures", + "half 2.3.1", "hashbrown 0.14.3", - "lz4", + "lz4_flex 0.11.2", "num", "num-bigint", "object_store", @@ -6404,7 +6522,7 @@ dependencies = [ "thrift", "tokio", "twox-hash", - "zstd 0.12.4", + "zstd 0.13.0", ] [[package]] @@ -6434,7 +6552,7 @@ dependencies = [ "serde_json", "snafu", "sql", - "sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", + "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", "store-api", "table", ] @@ -6544,7 +6662,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -6565,7 +6683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.1.0", + "indexmap 2.2.6", ] [[package]] @@ -6659,7 +6777,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -6929,7 +7047,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -6986,9 +7104,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.71" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -7051,9 +7169,12 @@ dependencies = [ "common-recordbatch", "common-telemetry", "datafusion", + "datafusion-expr", + "datafusion-functions", "datatypes", "futures", "greptime-proto", + "itertools 0.10.5", "lazy_static", "prometheus", "promql-parser", @@ -7105,7 +7226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", - "heck", + "heck 0.4.1", "itertools 0.10.5", "lazy_static", "log", @@ -7127,7 +7248,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" dependencies = [ "bytes", - "heck", + "heck 0.4.1", "itertools 0.11.0", "log", "multimap", @@ -7137,7 +7258,7 @@ dependencies = [ "prost 0.12.3", "prost-types 0.12.3", "regex", - "syn 2.0.43", + "syn 2.0.55", "tempfile", "which", ] @@ -7165,7 +7286,7 @@ dependencies = [ "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -7292,15 +7413,16 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" +checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" dependencies = [ "cfg-if 1.0.0", "indoc", "libc", "memoffset 0.9.0", "parking_lot 0.12.1", + "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", @@ -7309,9 +7431,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" +checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" dependencies = [ "once_cell", "target-lexicon", @@ -7319,9 +7441,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" +checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" dependencies = [ "libc", "pyo3-build-config", @@ -7329,25 +7451,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" +checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 1.0.109", + "syn 2.0.55", ] [[package]] name = "pyo3-macros-backend" -version = "0.19.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" +checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" dependencies = [ + "heck 0.4.1", "proc-macro2", + "pyo3-build-config", "quote", - "syn 1.0.109", + "syn 2.0.55", ] [[package]] @@ -7452,9 +7576,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -7637,7 +7761,7 @@ checksum = "2566c4bf6845f2c2e83b27043c3f5dfcd5ba8f2937d6c00dc009bfb51a079dc4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -7678,12 +7802,6 @@ version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - [[package]] name = "regex-syntax" version = "0.8.2" @@ -7700,6 +7818,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "regress" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f5f39ba4513916c1b2657b72af6ec671f091cd637992f58d0ede5cae4e5dea0" +dependencies = [ + "hashbrown 0.14.3", + "memchr", +] + [[package]] name = "rend" version = "0.4.1" @@ -8019,7 +8147,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.43", + "syn 2.0.55", "walkdir", ] @@ -8332,7 +8460,7 @@ dependencies = [ "bitflags 1.3.2", "bstr", "itertools 0.10.5", - "lz4_flex", + "lz4_flex 0.9.5", "num-bigint", "num-complex", ] @@ -8754,6 +8882,7 @@ dependencies = [ "datafusion", "datafusion-common", "datafusion-expr", + "datafusion-functions", "datafusion-physical-expr", "datatypes", "futures", @@ -8817,10 +8946,10 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -8858,9 +8987,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" dependencies = [ "serde", ] @@ -8873,22 +9002,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.193" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -8904,11 +9033,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.6", "itoa", "ryu", "serde", @@ -8932,7 +9061,7 @@ checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -8953,7 +9082,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -8978,7 +9107,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.1.0", + "indexmap 2.2.6", "serde", "serde_json", "serde_with_macros", @@ -8994,16 +9123,16 @@ dependencies = [ "darling 0.20.3", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] name = "serde_yaml" -version = "0.9.29" +version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15e0ef66bf939a7c890a0bf6d5a733c70202225f9888a89ed5c62298b019129" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.6", "itoa", "ryu", "serde", @@ -9068,7 +9197,7 @@ dependencies = [ "once_cell", "openmetrics-parser", "opensrv-mysql", - "opentelemetry-proto 0.3.0", + "opentelemetry-proto 0.5.0", "parking_lot 0.12.1", "permutation", "pgwire", @@ -9105,7 +9234,7 @@ dependencies = [ "tokio-rustls 0.25.0", "tokio-stream", "tokio-test", - "tonic 0.10.2", + "tonic 0.11.0", "tonic-reflection", "tower", "tower-http", @@ -9309,7 +9438,7 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "syn 1.0.109", @@ -9410,8 +9539,8 @@ dependencies = [ "lazy_static", "regex", "snafu", - "sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", - "sqlparser_derive 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", + "sqlparser_derive 0.1.1", "table", ] @@ -9461,24 +9590,24 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.38.0" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +checksum = "aaf9c7ff146298ffda83a200f8d5084f08dcee1edfc135fcc1d646a45d50ffd6" dependencies = [ "log", - "sqlparser_derive 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser_derive 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "sqlparser" -version = "0.38.0" -source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef#6a93567ae38d42be5c8d08b13c8ff4dde26502ef" +version = "0.44.0" +source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0#c919990bf62ad38d2b0c0a3bc90b26ad919d51b0" dependencies = [ "lazy_static", "log", "regex", - "sqlparser 0.38.0 (registry+https://github.com/rust-lang/crates.io-index)", - "sqlparser_derive 0.1.1 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", + "sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", ] [[package]] @@ -9494,12 +9623,23 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.1.1" -source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef#6a93567ae38d42be5c8d08b13c8ff4dde26502ef" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.55", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0#c919990bf62ad38d2b0c0a3bc90b26ad919d51b0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.55", ] [[package]] @@ -9519,7 +9659,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa8241483a83a3f33aa5fff7e7d9def398ff9990b2752b6c6112b83c6d246029" dependencies = [ "ahash 0.7.7", - "atoi", + "atoi 1.0.0", "base64 0.13.1", "bitflags 1.3.2", "byteorder", @@ -9578,7 +9718,7 @@ checksum = "9966e64ae989e7e575b19d7265cb79d7fc3cbbdf179835cb0d716f294c2049c9" dependencies = [ "dotenvy", "either", - "heck", + "heck 0.4.1", "once_cell", "proc-macro2", "quote", @@ -9748,13 +9888,22 @@ dependencies = [ "strum_macros 0.25.3", ] +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" +dependencies = [ + "strum_macros 0.26.2", +] + [[package]] name = "strum_macros" version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", @@ -9767,11 +9916,24 @@ version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn 2.0.43", + "syn 2.0.55", +] + +[[package]] +name = "strum_macros" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.55", ] [[package]] @@ -9792,7 +9954,9 @@ dependencies = [ "bytes", "catalog", "common-error", + "common-function", "common-macro", + "common-telemetry", "datafusion", "datafusion-common", "datafusion-expr", @@ -9800,6 +9964,7 @@ dependencies = [ "datatypes", "promql", "prost 0.12.3", + "session", "snafu", "substrait 0.17.1", "tokio", @@ -9812,7 +9977,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1e8440a1c9b95a7c9a00a19f78b980749e8c945eb880687a5d673cea83729c5" dependencies = [ "git2", - "heck", + "heck 0.4.1", "prettyplease 0.2.15", "prost 0.12.3", "prost-build 0.12.3", @@ -9822,8 +9987,29 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.43", - "typify", + "syn 2.0.55", + "typify 0.0.14", + "walkdir", +] + +[[package]] +name = "substrait" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba959c71b2a1a341a94e1f362615d7e5f1a4de9d25d82fceea8160f79f1e1dfb" +dependencies = [ + "heck 0.5.0", + "prettyplease 0.2.15", + "prost 0.12.3", + "prost-build 0.12.3", + "prost-types 0.12.3", + "schemars", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.55", + "typify 0.0.16", "walkdir", ] @@ -9869,9 +10055,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.43" +version = "2.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" +checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" dependencies = [ "proc-macro2", "quote", @@ -9906,7 +10092,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -10085,7 +10271,7 @@ dependencies = [ "serde_json", "snafu", "sql", - "sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)", + "sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)", "sqlx", "tokio", ] @@ -10128,7 +10314,7 @@ dependencies = [ "meta-srv", "mysql_async", "object-store", - "opentelemetry-proto 0.3.0", + "opentelemetry-proto 0.5.0", "operator", "partition", "paste", @@ -10151,7 +10337,7 @@ dependencies = [ "time", "tokio", "tokio-postgres", - "tonic 0.10.2", + "tonic 0.11.0", "tower", "uuid", "zstd 0.13.0", @@ -10180,22 +10366,22 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.51" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.51" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -10343,9 +10529,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.35.1" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", @@ -10379,7 +10565,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -10556,7 +10742,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.6", "toml_datetime", "winnow", ] @@ -10567,7 +10753,7 @@ version = "0.20.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.6", "toml_datetime", "winnow", ] @@ -10578,7 +10764,7 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.6", "serde", "serde_spanned", "toml_datetime", @@ -10615,9 +10801,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d560933a0de61cf715926b9cac824d4c883c2c43142f787595e48280c40a1d0e" +checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" dependencies = [ "async-stream", "async-trait", @@ -10632,10 +10818,10 @@ dependencies = [ "percent-encoding", "pin-project", "prost 0.12.3", - "rustls 0.21.10", - "rustls-pemfile 1.0.4", + "rustls-pemfile 2.0.0", + "rustls-pki-types", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls 0.25.0", "tokio-stream", "tower", "tower-layer", @@ -10666,20 +10852,33 @@ dependencies = [ "proc-macro2", "prost-build 0.12.3", "quote", - "syn 2.0.43", + "syn 2.0.55", +] + +[[package]] +name = "tonic-build" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" +dependencies = [ + "prettyplease 0.2.15", + "proc-macro2", + "prost-build 0.12.3", + "quote", + "syn 2.0.55", ] [[package]] name = "tonic-reflection" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fa37c513df1339d197f4ba21d28c918b9ef1ac1768265f11ecb6b7f1cba1b76" +checksum = "548c227bd5c0fae5925812c4ec6c66ffcfced23ea370cb823f4d18f0fc1cb6a7" dependencies = [ "prost 0.12.3", "prost-types 0.12.3", "tokio", "tokio-stream", - "tonic 0.10.2", + "tonic 0.11.0", ] [[package]] @@ -10777,7 +10976,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -10820,7 +11019,7 @@ checksum = "c67ac25c5407e7b961fafc6f7e9aa5958fd297aada2d20fa2ae1737357e55596" dependencies = [ "js-sys", "once_cell", - "opentelemetry 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", + "opentelemetry 0.21.0", "opentelemetry_sdk 0.21.1", "smallvec", "tracing", @@ -11018,7 +11217,7 @@ checksum = "2eea6765137e2414c44c7b1e07c73965a118a72c46148e1e168b3fc9d3ccf3aa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -11027,8 +11226,18 @@ version = "0.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2e3b707a653e2915a2fc2c4ee96a3d30b9554b9435eb4cc8b5c6c74bbdd3044" dependencies = [ - "typify-impl", - "typify-macro", + "typify-impl 0.0.14", + "typify-macro 0.0.14", +] + +[[package]] +name = "typify" +version = "0.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c61e9db210bbff218e6535c664b37ec47da449169b98e7866d0580d0db75529" +dependencies = [ + "typify-impl 0.0.16", + "typify-macro 0.0.16", ] [[package]] @@ -11037,14 +11246,32 @@ version = "0.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d9c752192779f666e4c868672dee56a652b82c08032c7e9d23f6a845b282298" dependencies = [ - "heck", + "heck 0.4.1", "log", "proc-macro2", "quote", - "regress", + "regress 0.7.1", "schemars", "serde_json", - "syn 2.0.43", + "syn 2.0.55", + "thiserror", + "unicode-ident", +] + +[[package]] +name = "typify-impl" +version = "0.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95e32f38493804f88e2dc7a5412eccd872ea5452b4db9b0a77de4df180f2a87e" +dependencies = [ + "heck 0.4.1", + "log", + "proc-macro2", + "quote", + "regress 0.8.0", + "schemars", + "serde_json", + "syn 2.0.55", "thiserror", "unicode-ident", ] @@ -11061,8 +11288,24 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.43", - "typify-impl", + "syn 2.0.55", + "typify-impl 0.0.14", +] + +[[package]] +name = "typify-macro" +version = "0.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc09508b72f63d521d68e42c7f172c7416d67986df44b3c7d1f7f9963948ed32" +dependencies = [ + "proc-macro2", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.55", + "typify-impl 0.0.16", ] [[package]] @@ -11272,15 +11515,15 @@ dependencies = [ [[package]] name = "unindent" -version = "0.1.11" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "unsafe-libyaml" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" [[package]] name = "untrusted" @@ -11325,9 +11568,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ "atomic", "getrandom", @@ -11338,13 +11581,13 @@ dependencies = [ [[package]] name = "uuid-macro-internal" -version = "1.6.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49e7f3f3db8040a100710a11932239fd30697115e2ba4107080d8252939845e" +checksum = "9881bea7cbe687e36c9ab3b778c36cd0487402e270304e8b1296d5085303c1a2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -11413,9 +11656,9 @@ checksum = "f8e76fae08f03f96e166d2dfda232190638c10e0383841252416f9cfe2ae60e6" [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -11436,12 +11679,6 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "wasite" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" - [[package]] name = "wasm-bindgen" version = "0.2.89" @@ -11463,7 +11700,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", "wasm-bindgen-shared", ] @@ -11497,7 +11734,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -11597,12 +11834,11 @@ dependencies = [ [[package]] name = "whoami" -version = "1.5.1" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50" dependencies = [ - "redox_syscall 0.4.1", - "wasite", + "wasm-bindgen", "web-sys", ] @@ -12019,7 +12255,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] @@ -12039,7 +12275,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.55", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 788bc68798..06bdbc1808 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,13 +73,18 @@ clippy.implicit_clone = "warn" rust.unknown_lints = "deny" [workspace.dependencies] +# We turn off default-features for some dependencies here so the workspaces which inherit them can +# selectively turn them on if needed, since we can override default-features = true (from false) +# for the inherited dependency but cannot do the reverse (override from true to false). +# +# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329 ahash = { version = "0.8", features = ["compile-time-rng"] } aquamarine = "0.3" -arrow = { version = "47.0" } -arrow-array = "47.0" -arrow-flight = "47.0" -arrow-ipc = { version = "47.0", features = ["lz4"] } -arrow-schema = { version = "47.0", features = ["serde"] } +arrow = { version = "51.0.0", features = ["prettyprint"] } +arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] } +arrow-flight = "51.0" +arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4"] } +arrow-schema = { version = "51.0", features = ["serde"] } async-stream = "0.3" async-trait = "0.1" axum = { version = "0.6", features = ["headers"] } @@ -91,20 +96,22 @@ bytes = { version = "1.5", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] } clap = { version = "4.4", features = ["derive"] } dashmap = "5.4" -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } -datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } -datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } -datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-functions = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } +datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" } derive_builder = "0.12" dotenv = "0.15" -etcd-client = "0.12" +# TODO(LFC): Wait for https://github.com/etcdv3/etcd-client/pull/76 +etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev = "4c371e9b3ea8e0a8ee2f9cbd7ded26e54a45df3b" } fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "04d78b6e025ceb518040fdd10858c2a9d9345820" } +greptime-proto = { git = "https://github.com/MichaelScofield/greptime-proto.git", rev = "bdbd4cfa871ec8d192d3dbabf11debcb2cb67748" } humantime = "2.1" humantime-serde = "1.1" itertools = "0.10" @@ -115,12 +122,12 @@ moka = "0.12" notify = "6.1" num_cpus = "1.16" once_cell = "1.18" -opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [ +opentelemetry-proto = { version = "0.5", features = [ "gen-tonic", "metrics", "trace", ] } -parquet = "47.0" +parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] } paste = "1.0" pin-project = "1.0" prometheus = { version = "0.13.3", features = ["process"] } @@ -144,18 +151,18 @@ serde_with = "3" smallvec = { version = "1", features = ["serde"] } snafu = "0.7" sysinfo = "0.30" -# on branch v0.38.x -sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [ +# on branch v0.44.x +sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "c919990bf62ad38d2b0c0a3bc90b26ad919d51b0", features = [ "visitor", ] } strum = { version = "0.25", features = ["derive"] } tempfile = "3" -tokio = { version = "1.28", features = ["full"] } +tokio = { version = "1.36", features = ["full"] } tokio-stream = { version = "0.1" } tokio-util = { version = "0.7", features = ["io-util", "compat"] } toml = "0.8.8" -tonic = { version = "0.10", features = ["tls"] } -uuid = { version = "1", features = ["serde", "v4", "fast-rng"] } +tonic = { version = "0.11", features = ["tls"] } +uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] } zstd = "0.13" ## workspaces members diff --git a/benchmarks/src/bin/nyc-taxi.rs b/benchmarks/src/bin/nyc-taxi.rs index 1e60db69fa..bfc26f3dae 100644 --- a/benchmarks/src/bin/nyc-taxi.rs +++ b/benchmarks/src/bin/nyc-taxi.rs @@ -215,37 +215,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) { ColumnDataType::String, ) } - DataType::Null - | DataType::Boolean - | DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 - | DataType::Float16 - | DataType::Float32 - | DataType::Date32 - | DataType::Date64 - | DataType::Time32(_) - | DataType::Time64(_) - | DataType::Duration(_) - | DataType::Interval(_) - | DataType::Binary - | DataType::FixedSizeBinary(_) - | DataType::LargeBinary - | DataType::LargeUtf8 - | DataType::List(_) - | DataType::FixedSizeList(_, _) - | DataType::LargeList(_) - | DataType::Struct(_) - | DataType::Union(_, _) - | DataType::Dictionary(_, _) - | DataType::Decimal128(_, _) - | DataType::Decimal256(_, _) - | DataType::RunEndEncoded(_, _) - | DataType::Map(_, _) => todo!(), + _ => unimplemented!(), } } @@ -444,7 +414,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr { fn query_set(table_name: &str) -> HashMap { HashMap::from([ ( - "count_all".to_string(), + "count_all".to_string(), format!("SELECT COUNT(*) FROM {table_name};"), ), ( diff --git a/src/catalog/src/table_source.rs b/src/catalog/src/table_source.rs index d5d0c282e6..58813a460e 100644 --- a/src/catalog/src/table_source.rs +++ b/src/catalog/src/table_source.rs @@ -49,10 +49,7 @@ impl DfTableSourceProvider { } } - pub fn resolve_table_ref<'a>( - &'a self, - table_ref: TableReference<'a>, - ) -> Result> { + pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result { if self.disallow_cross_catalog_query { match &table_ref { TableReference::Bare { .. } => (), @@ -76,7 +73,7 @@ impl DfTableSourceProvider { pub async fn resolve_table( &mut self, - table_ref: TableReference<'_>, + table_ref: TableReference, ) -> Result> { let table_ref = self.resolve_table_ref(table_ref)?; @@ -106,8 +103,6 @@ impl DfTableSourceProvider { #[cfg(test)] mod tests { - use std::borrow::Cow; - use session::context::QueryContext; use super::*; @@ -120,68 +115,37 @@ mod tests { let table_provider = DfTableSourceProvider::new(MemoryCatalogManager::with_default_setup(), true, query_ctx); - let table_ref = TableReference::Bare { - table: Cow::Borrowed("table_name"), - }; + let table_ref = TableReference::bare("table_name"); let result = table_provider.resolve_table_ref(table_ref); assert!(result.is_ok()); - let table_ref = TableReference::Partial { - schema: Cow::Borrowed("public"), - table: Cow::Borrowed("table_name"), - }; + let table_ref = TableReference::partial("public", "table_name"); let result = table_provider.resolve_table_ref(table_ref); assert!(result.is_ok()); - let table_ref = TableReference::Partial { - schema: Cow::Borrowed("wrong_schema"), - table: Cow::Borrowed("table_name"), - }; + let table_ref = TableReference::partial("wrong_schema", "table_name"); let result = table_provider.resolve_table_ref(table_ref); assert!(result.is_ok()); - let table_ref = TableReference::Full { - catalog: Cow::Borrowed("greptime"), - schema: Cow::Borrowed("public"), - table: Cow::Borrowed("table_name"), - }; + let table_ref = TableReference::full("greptime", "public", "table_name"); let result = table_provider.resolve_table_ref(table_ref); assert!(result.is_ok()); - let table_ref = TableReference::Full { - catalog: Cow::Borrowed("wrong_catalog"), - schema: Cow::Borrowed("public"), - table: Cow::Borrowed("table_name"), - }; + let table_ref = TableReference::full("wrong_catalog", "public", "table_name"); let result = table_provider.resolve_table_ref(table_ref); assert!(result.is_err()); - let table_ref = TableReference::Partial { - schema: Cow::Borrowed("information_schema"), - table: Cow::Borrowed("columns"), - }; + let table_ref = TableReference::partial("information_schema", "columns"); let result = table_provider.resolve_table_ref(table_ref); assert!(result.is_ok()); - let table_ref = TableReference::Full { - catalog: Cow::Borrowed("greptime"), - schema: Cow::Borrowed("information_schema"), - table: Cow::Borrowed("columns"), - }; + let table_ref = TableReference::full("greptime", "information_schema", "columns"); assert!(table_provider.resolve_table_ref(table_ref).is_ok()); - let table_ref = TableReference::Full { - catalog: Cow::Borrowed("dummy"), - schema: Cow::Borrowed("information_schema"), - table: Cow::Borrowed("columns"), - }; + let table_ref = TableReference::full("dummy", "information_schema", "columns"); assert!(table_provider.resolve_table_ref(table_ref).is_err()); - let table_ref = TableReference::Full { - catalog: Cow::Borrowed("greptime"), - schema: Cow::Borrowed("greptime_private"), - table: Cow::Borrowed("columns"), - }; + let table_ref = TableReference::full("greptime", "greptime_private", "columns"); assert!(table_provider.resolve_table_ref(table_ref).is_ok()); } } diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 8f11043afd..1a4792dcb0 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -30,7 +30,7 @@ derive_builder.workspace = true futures.workspace = true lazy_static.workspace = true object-store.workspace = true -orc-rust = "0.2" +orc-rust = { git = "https://github.com/MichaelScofield/orc-rs.git", rev = "17347f5f084ac937863317df882218055c4ea8c1" } parquet.workspace = true paste = "1.0" regex = "1.7" diff --git a/src/common/datasource/src/file_format/csv.rs b/src/common/datasource/src/file_format/csv.rs index 0767722d9b..4cf2b9e133 100644 --- a/src/common/datasource/src/file_format/csv.rs +++ b/src/common/datasource/src/file_format/csv.rs @@ -117,7 +117,7 @@ impl CsvConfig { let mut builder = csv::ReaderBuilder::new(self.file_schema.clone()) .with_delimiter(self.delimiter) .with_batch_size(self.batch_size) - .has_header(self.has_header); + .with_header(self.has_header); if let Some(proj) = &self.file_projection { builder = builder.with_projection(proj.clone()); diff --git a/src/common/datasource/src/file_format/tests.rs b/src/common/datasource/src/file_format/tests.rs index be8650c9d5..7be8664b72 100644 --- a/src/common/datasource/src/file_format/tests.rs +++ b/src/common/datasource/src/file_format/tests.rs @@ -19,6 +19,7 @@ use std::vec; use common_test_util::find_workspace_path; use datafusion::assert_batches_eq; +use datafusion::config::TableParquetOptions; use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec}; use datafusion::execution::context::TaskContext; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; @@ -166,7 +167,7 @@ async fn test_parquet_exec() { .to_string(); let base_config = scan_config(schema.clone(), None, path); - let exec = ParquetExec::new(base_config, None, None) + let exec = ParquetExec::new(base_config, None, None, TableParquetOptions::default()) .with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store))); let ctx = SessionContext::new(); diff --git a/src/common/datasource/src/test_util.rs b/src/common/datasource/src/test_util.rs index 04125f1613..8f1af59c90 100644 --- a/src/common/datasource/src/test_util.rs +++ b/src/common/datasource/src/test_util.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use common_test_util::temp_dir::{create_temp_dir, TempDir}; +use datafusion::common::Statistics; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::{FileScanConfig, FileStream}; @@ -72,17 +73,16 @@ pub fn test_basic_schema() -> SchemaRef { pub fn scan_config(file_schema: SchemaRef, limit: Option, filename: &str) -> FileScanConfig { // object_store only recognize the Unix style path, so make it happy. let filename = &filename.replace('\\', "/"); - + let statistics = Statistics::new_unknown(file_schema.as_ref()); FileScanConfig { object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used file_schema, file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]], - statistics: Default::default(), + statistics, projection: None, limit, table_partition_cols: vec![], output_ordering: vec![], - infinite_source: false, } } diff --git a/src/common/function/src/scalars/aggregate/diff.rs b/src/common/function/src/scalars/aggregate/diff.rs index 9893d6199b..b83ed6d004 100644 --- a/src/common/function/src/scalars/aggregate/diff.rs +++ b/src/common/function/src/scalars/aggregate/diff.rs @@ -56,7 +56,7 @@ where .map(|&n| n.into()) .collect::>(); Ok(vec![Value::List(ListValue::new( - Some(Box::new(nums)), + nums, I::LogicalType::build_data_type(), ))]) } @@ -120,10 +120,7 @@ where O::from_native(native).into() }) .collect::>(); - let diff = Value::List(ListValue::new( - Some(Box::new(diff)), - O::LogicalType::build_data_type(), - )); + let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type())); Ok(diff) } } @@ -218,10 +215,7 @@ mod test { let values = vec![Value::from(2_i64), Value::from(1_i64)]; diff.update_batch(&v).unwrap(); assert_eq!( - Value::List(ListValue::new( - Some(Box::new(values)), - ConcreteDataType::int64_datatype() - )), + Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())), diff.evaluate().unwrap() ); @@ -236,10 +230,7 @@ mod test { let values = vec![Value::from(5_i64), Value::from(1_i64)]; diff.update_batch(&v).unwrap(); assert_eq!( - Value::List(ListValue::new( - Some(Box::new(values)), - ConcreteDataType::int64_datatype() - )), + Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())), diff.evaluate().unwrap() ); @@ -252,10 +243,7 @@ mod test { let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)]; diff.update_batch(&v).unwrap(); assert_eq!( - Value::List(ListValue::new( - Some(Box::new(values)), - ConcreteDataType::int64_datatype() - )), + Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())), diff.evaluate().unwrap() ); } diff --git a/src/common/function/src/scalars/aggregate/percentile.rs b/src/common/function/src/scalars/aggregate/percentile.rs index 49b981a7ee..231e0bf43a 100644 --- a/src/common/function/src/scalars/aggregate/percentile.rs +++ b/src/common/function/src/scalars/aggregate/percentile.rs @@ -104,10 +104,7 @@ where .map(|&n| n.into()) .collect::>(); Ok(vec![ - Value::List(ListValue::new( - Some(Box::new(nums)), - T::LogicalType::build_data_type(), - )), + Value::List(ListValue::new(nums, T::LogicalType::build_data_type())), self.p.into(), ]) } diff --git a/src/common/function/src/scalars/aggregate/polyval.rs b/src/common/function/src/scalars/aggregate/polyval.rs index b56a692c8d..ae6ca101c4 100644 --- a/src/common/function/src/scalars/aggregate/polyval.rs +++ b/src/common/function/src/scalars/aggregate/polyval.rs @@ -72,10 +72,7 @@ where .map(|&n| n.into()) .collect::>(); Ok(vec![ - Value::List(ListValue::new( - Some(Box::new(nums)), - T::LogicalType::build_data_type(), - )), + Value::List(ListValue::new(nums, T::LogicalType::build_data_type())), self.x.into(), ]) } diff --git a/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs b/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs index 2ec9540513..e6c92225a6 100644 --- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs +++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_cdf.rs @@ -56,10 +56,7 @@ where .map(|&x| x.into()) .collect::>(); Ok(vec![ - Value::List(ListValue::new( - Some(Box::new(nums)), - T::LogicalType::build_data_type(), - )), + Value::List(ListValue::new(nums, T::LogicalType::build_data_type())), self.x.into(), ]) } diff --git a/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs b/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs index d1bf432c99..3045ae8665 100644 --- a/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs +++ b/src/common/function/src/scalars/aggregate/scipy_stats_norm_pdf.rs @@ -56,10 +56,7 @@ where .map(|&x| x.into()) .collect::>(); Ok(vec![ - Value::List(ListValue::new( - Some(Box::new(nums)), - T::LogicalType::build_data_type(), - )), + Value::List(ListValue::new(nums, T::LogicalType::build_data_type())), self.x.into(), ]) } diff --git a/src/common/function/src/scalars/math.rs b/src/common/function/src/scalars/math.rs index f7d50f881d..6635e70b17 100644 --- a/src/common/function/src/scalars/math.rs +++ b/src/common/function/src/scalars/math.rs @@ -77,7 +77,7 @@ impl Function for RangeFunction { /// `range_fn` will never been used. As long as a legal signature is returned, the specific content of the signature does not matter. /// In fact, the arguments loaded by `range_fn` are very complicated, and it is difficult to use `Signature` to describe fn signature(&self) -> Signature { - Signature::any(0, Volatility::Immutable) + Signature::variadic_any(Volatility::Immutable) } fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result { diff --git a/src/common/macro/src/range_fn.rs b/src/common/macro/src/range_fn.rs index c907f1d0d1..582fff523d 100644 --- a/src/common/macro/src/range_fn.rs +++ b/src/common/macro/src/range_fn.rs @@ -119,15 +119,17 @@ fn build_struct( } pub fn scalar_udf() -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(Self::calc), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(Self::calc) as _), + ) } fn input_type() -> Vec { diff --git a/src/common/query/src/columnar_value.rs b/src/common/query/src/columnar_value.rs index 4e79d7819f..419bc5ed3a 100644 --- a/src/common/query/src/columnar_value.rs +++ b/src/common/query/src/columnar_value.rs @@ -17,7 +17,7 @@ use datatypes::prelude::ConcreteDataType; use datatypes::vectors::{Helper, VectorRef}; use snafu::ResultExt; -use crate::error::{self, IntoVectorSnafu, Result}; +use crate::error::{self, GeneralDataFusionSnafu, IntoVectorSnafu, Result}; use crate::prelude::ScalarValue; /// Represents the result from an expression @@ -43,7 +43,9 @@ impl ColumnarValue { Ok(match self { ColumnarValue::Vector(v) => v, ColumnarValue::Scalar(s) => { - let v = s.to_array_of_size(num_rows); + let v = s + .to_array_of_size(num_rows) + .context(GeneralDataFusionSnafu)?; let data_type = v.data_type().clone(); Helper::try_into_vector(v).context(IntoVectorSnafu { data_type })? } diff --git a/src/common/query/src/lib.rs b/src/common/query/src/lib.rs index cb39d37fb4..ca81ad9e41 100644 --- a/src/common/query/src/lib.rs +++ b/src/common/query/src/lib.rs @@ -94,8 +94,8 @@ impl Debug for OutputData { OutputData::RecordBatches(recordbatches) => { write!(f, "OutputData::RecordBatches({recordbatches:?})") } - OutputData::Stream(_) => { - write!(f, "OutputData::Stream()") + OutputData::Stream(s) => { + write!(f, "OutputData::Stream(<{}>)", s.name()) } } } diff --git a/src/common/query/src/logical_plan.rs b/src/common/query/src/logical_plan.rs index ac20c74b5b..ab5dedc14f 100644 --- a/src/common/query/src/logical_plan.rs +++ b/src/common/query/src/logical_plan.rs @@ -72,6 +72,7 @@ pub fn create_aggregate_function( mod tests { use std::sync::Arc; + use datafusion_common::DFSchema; use datafusion_expr::{ ColumnarValue as DfColumnarValue, ScalarUDF as DfScalarUDF, TypeSignature as DfTypeSignature, @@ -135,15 +136,17 @@ mod tests { // test into_df_udf let df_udf: DfScalarUDF = udf.into(); - assert_eq!("and", df_udf.name); + assert_eq!("and", df_udf.name()); let types = vec![DataType::Boolean, DataType::Boolean]; assert!( - matches!(&df_udf.signature.type_signature, DfTypeSignature::Exact(ts) if ts.clone() == types) + matches!(&df_udf.signature().type_signature, DfTypeSignature::Exact(ts) if ts.clone() == types) ); assert_eq!( - Arc::new(DataType::Boolean), - (df_udf.return_type)(&[]).unwrap() + DataType::Boolean, + df_udf + .return_type_from_exprs(&[], &DFSchema::empty(), &[]) + .unwrap() ); let args = vec![ @@ -152,7 +155,7 @@ mod tests { ]; // call the function - let result = (df_udf.fun)(&args).unwrap(); + let result = (df_udf.fun())(&args).unwrap(); match result { DfColumnarValue::Array(arr) => { diff --git a/src/common/query/src/logical_plan/accumulator.rs b/src/common/query/src/logical_plan/accumulator.rs index f0c272ada1..32f1b4587c 100644 --- a/src/common/query/src/logical_plan/accumulator.rs +++ b/src/common/query/src/logical_plan/accumulator.rs @@ -126,7 +126,7 @@ impl DfAccumulatorAdaptor { } impl DfAccumulator for DfAccumulatorAdaptor { - fn state(&self) -> DfResult> { + fn state(&mut self) -> DfResult> { let state_values = self.accumulator.state()?; let state_types = self.creator.state_types()?; if state_values.len() != state_types.len() { @@ -161,7 +161,7 @@ impl DfAccumulator for DfAccumulatorAdaptor { Ok(()) } - fn evaluate(&self) -> DfResult { + fn evaluate(&mut self) -> DfResult { let value = self.accumulator.evaluate()?; let output_type = self.creator.output_type()?; let scalar_value = value diff --git a/src/common/query/src/logical_plan/expr.rs b/src/common/query/src/logical_plan/expr.rs index e5abddc4d2..79f2363a6e 100644 --- a/src/common/query/src/logical_plan/expr.rs +++ b/src/common/query/src/logical_plan/expr.rs @@ -94,10 +94,10 @@ mod tests { #[test] fn test_from_df_expr() { - let df_expr = DfExpr::Wildcard; + let df_expr = DfExpr::Wildcard { qualifier: None }; let expr: Expr = df_expr.into(); - assert_eq!(DfExpr::Wildcard, *expr.df_expr()); + assert_eq!(DfExpr::Wildcard { qualifier: None }, *expr.df_expr()); } } diff --git a/src/common/query/src/logical_plan/udaf.rs b/src/common/query/src/logical_plan/udaf.rs index b96de5b888..b9a11cfbfb 100644 --- a/src/common/query/src/logical_plan/udaf.rs +++ b/src/common/query/src/logical_plan/udaf.rs @@ -16,15 +16,18 @@ //! //! Modified from DataFusion. +use std::any::Any; use std::fmt::{self, Debug, Formatter}; use std::sync::Arc; +use datafusion::arrow::datatypes::Field; +use datafusion_common::Result; +use datafusion_expr::function::AccumulatorArgs; use datafusion_expr::{ - AccumulatorFactoryFunction, AggregateUDF as DfAggregateUdf, - StateTypeFunction as DfStateTypeFunction, + Accumulator, AccumulatorFactoryFunction, AggregateUDF as DfAggregateUdf, AggregateUDFImpl, }; use datatypes::arrow::datatypes::DataType as ArrowDataType; -use datatypes::prelude::*; +use datatypes::data_type::DataType; use crate::function::{ to_df_return_type, AccumulatorFunctionImpl, ReturnTypeFunction, StateTypeFunction, @@ -90,13 +93,72 @@ impl AggregateFunction { impl From for DfAggregateUdf { fn from(udaf: AggregateFunction) -> Self { - DfAggregateUdf::new( - &udaf.name, - &udaf.signature.into(), - &to_df_return_type(udaf.return_type), - &to_df_accumulator_func(udaf.accumulator, udaf.creator.clone()), - &to_df_state_type(udaf.state_type), - ) + struct DfUdafAdapter { + name: String, + signature: datafusion_expr::Signature, + return_type_func: datafusion_expr::ReturnTypeFunction, + accumulator: AccumulatorFactoryFunction, + creator: AggregateFunctionCreatorRef, + } + + impl Debug for DfUdafAdapter { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("DfUdafAdapter") + .field("name", &self.name) + .field("signature", &self.signature) + .finish() + } + } + + impl AggregateUDFImpl for DfUdafAdapter { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + &self.name + } + + fn signature(&self) -> &datafusion_expr::Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[ArrowDataType]) -> Result { + (self.return_type_func)(arg_types).map(|x| x.as_ref().clone()) + } + + fn accumulator(&self, acc_args: AccumulatorArgs) -> Result> { + (self.accumulator)(acc_args) + } + + fn state_fields( + &self, + name: &str, + _value_type: ArrowDataType, + _ordering_fields: Vec, + ) -> Result> { + self.creator + .state_types() + .map(|x| { + (0..x.len()) + .zip(x) + .map(|(i, t)| { + Field::new(format!("{}_{}", name, i), t.as_arrow_type(), true) + }) + .collect::>() + }) + .map_err(|e| e.into()) + } + } + + DfUdafAdapter { + name: udaf.name, + signature: udaf.signature.into(), + return_type_func: to_df_return_type(udaf.return_type), + accumulator: to_df_accumulator_func(udaf.accumulator, udaf.creator.clone()), + creator: udaf.creator, + } + .into() } } @@ -110,19 +172,3 @@ fn to_df_accumulator_func( Ok(Box::new(DfAccumulatorAdaptor::new(accumulator, creator)) as _) }) } - -fn to_df_state_type(func: StateTypeFunction) -> DfStateTypeFunction { - let df_func = move |data_type: &ArrowDataType| { - // DataFusion DataType -> ConcreteDataType - let concrete_data_type = ConcreteDataType::from_arrow_type(data_type); - - // evaluate ConcreteDataType - let eval_result = (func)(&concrete_data_type); - - // ConcreteDataType -> DataFusion DataType - eval_result - .map(|ts| Arc::new(ts.iter().map(|t| t.as_arrow_type()).collect())) - .map_err(|e| e.into()) - }; - Arc::new(df_func) -} diff --git a/src/common/query/src/logical_plan/udf.rs b/src/common/query/src/logical_plan/udf.rs index 31d3561745..df5cec762c 100644 --- a/src/common/query/src/logical_plan/udf.rs +++ b/src/common/query/src/logical_plan/udf.rs @@ -70,6 +70,8 @@ impl ScalarUdf { impl From for DfScalarUDF { fn from(udf: ScalarUdf) -> Self { + // TODO(LFC): remove deprecated + #[allow(deprecated)] DfScalarUDF::new( &udf.name, &udf.signature.into(), diff --git a/src/common/query/src/physical_plan.rs b/src/common/query/src/physical_plan.rs index ddfb1bfb6c..919d104450 100644 --- a/src/common/query/src/physical_plan.rs +++ b/src/common/query/src/physical_plan.rs @@ -21,10 +21,9 @@ use common_recordbatch::{DfSendableRecordBatchStream, SendableRecordBatchStream} use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef; use datafusion::error::Result as DfResult; pub use datafusion::execution::context::{SessionContext, TaskContext}; -use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; pub use datafusion::physical_plan::Partitioning; -use datafusion::physical_plan::{DisplayAs, DisplayFormatType, Statistics}; +use datafusion::physical_plan::{DisplayAs, DisplayFormatType, PlanProperties}; use datatypes::schema::SchemaRef; use snafu::ResultExt; @@ -47,13 +46,9 @@ pub trait PhysicalPlan: Debug + Send + Sync { /// Get the schema for this physical plan fn schema(&self) -> SchemaRef; - /// Specifies the output partitioning scheme of this plan - fn output_partitioning(&self) -> Partitioning; - - /// returns `Some(keys)` that describes how the output was sorted. - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None - } + /// Return properties of the output of the [PhysicalPlan], such as output + /// ordering(s), partitioning information etc. + fn properties(&self) -> &PlanProperties; /// Get a list of child physical plans that provide the input for this plan. The returned list /// will be empty for leaf nodes, will contain a single value for unary nodes, or two @@ -107,8 +102,8 @@ impl PhysicalPlan for PhysicalPlanAdapter { self.schema.clone() } - fn output_partitioning(&self) -> Partitioning { - self.df_plan.output_partitioning() + fn properties(&self) -> &PlanProperties { + self.df_plan.properties() } fn children(&self) -> Vec { @@ -170,14 +165,6 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter { self.0.schema().arrow_schema().clone() } - fn output_partitioning(&self) -> Partitioning { - self.0.output_partitioning() - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.0.output_ordering() - } - fn children(&self) -> Vec> { self.0 .children() @@ -213,13 +200,13 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter { Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream))) } - fn statistics(&self) -> Statistics { - Statistics::default() - } - fn metrics(&self) -> Option { self.0.metrics() } + + fn properties(&self) -> &PlanProperties { + self.0.properties() + } } impl DisplayAs for DfPhysicalPlanAdapter { @@ -232,10 +219,12 @@ impl DisplayAs for DfPhysicalPlanAdapter { mod test { use async_trait::async_trait; use common_recordbatch::{RecordBatch, RecordBatches}; + use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef; use datafusion::datasource::{DefaultTableSource, TableProvider as DfTableProvider, TableType}; use datafusion::execution::context::{SessionContext, SessionState}; - use datafusion::physical_plan::collect; + use datafusion::physical_expr::EquivalenceProperties; use datafusion::physical_plan::empty::EmptyExec; + use datafusion::physical_plan::{collect, ExecutionMode}; use datafusion_expr::logical_plan::builder::LogicalPlanBuilder; use datafusion_expr::{Expr, TableSource}; use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; @@ -272,10 +261,13 @@ mod test { _filters: &[Expr], _limit: Option, ) -> DfResult> { - let schema = Schema::try_from(self.schema()).unwrap(); - let my_plan = Arc::new(MyExecutionPlan { - schema: Arc::new(schema), - }); + let schema = Arc::new(Schema::try_from(self.schema()).unwrap()); + let properties = PlanProperties::new( + EquivalenceProperties::new(schema.arrow_schema().clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); + let my_plan = Arc::new(MyExecutionPlan { schema, properties }); let df_plan = DfPhysicalPlanAdapter(my_plan); Ok(Arc::new(df_plan)) } @@ -289,9 +281,10 @@ mod test { } } - #[derive(Debug)] + #[derive(Debug, Clone)] struct MyExecutionPlan { schema: SchemaRef, + properties: PlanProperties, } impl PhysicalPlan for MyExecutionPlan { @@ -303,8 +296,8 @@ mod test { self.schema.clone() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) + fn properties(&self) -> &PlanProperties { + &self.properties } fn children(&self) -> Vec { @@ -312,7 +305,7 @@ mod test { } fn with_new_children(&self, _children: Vec) -> Result { - unimplemented!() + Ok(Arc::new(self.clone())) } fn execute( @@ -381,7 +374,7 @@ mod test { let plan = PhysicalPlanAdapter::new( Arc::new(Schema::try_from(df_schema.clone()).unwrap()), - Arc::new(EmptyExec::new(true, df_schema.clone())), + Arc::new(EmptyExec::new(df_schema.clone())), ); let _ = plan.df_plan.as_any().downcast_ref::().unwrap(); diff --git a/src/common/query/src/signature.rs b/src/common/query/src/signature.rs index 9e92a10e17..a234990bf6 100644 --- a/src/common/query/src/signature.rs +++ b/src/common/query/src/signature.rs @@ -31,6 +31,8 @@ pub enum TypeSignature { // A function such as `array` is `VariadicEqual` // The first argument decides the type used for coercion VariadicEqual, + /// One or more arguments with arbitrary types + VariadicAny, /// fixed number of arguments of an arbitrary but equal type out of a list of valid types // A function of one argument of f64 is `Uniform(1, vec![ConcreteDataType::Float64])` // A function of one argument of f64 or f32 is `Uniform(1, vec![ConcreteDataType::Float32, ConcreteDataType::Float64])` @@ -79,6 +81,15 @@ impl Signature { volatility, } } + + /// variadic_any - Creates a variadic signature that represents an arbitrary number of arguments of any type. + pub fn variadic_any(volatility: Volatility) -> Self { + Self { + type_signature: TypeSignature::VariadicAny, + volatility, + } + } + /// uniform - Creates a function with a fixed number of arguments of the same type, which must be from valid_types. pub fn uniform( arg_count: usize, @@ -131,6 +142,7 @@ impl From for DfTypeSignature { TypeSignature::OneOf(ts) => { DfTypeSignature::OneOf(ts.into_iter().map(Into::into).collect()) } + TypeSignature::VariadicAny => DfTypeSignature::VariadicAny, } } } diff --git a/src/common/recordbatch/Cargo.toml b/src/common/recordbatch/Cargo.toml index 12494cdcf5..d82b445c8e 100644 --- a/src/common/recordbatch/Cargo.toml +++ b/src/common/recordbatch/Cargo.toml @@ -11,6 +11,7 @@ workspace = true arc-swap = "1.6" common-error.workspace = true common-macro.workspace = true +common-telemetry.workspace = true datafusion.workspace = true datafusion-common.workspace = true datatypes.workspace = true diff --git a/src/common/recordbatch/src/adapter.rs b/src/common/recordbatch/src/adapter.rs index 12f5ecfdc9..5f5cc45abb 100644 --- a/src/common/recordbatch/src/adapter.rs +++ b/src/common/recordbatch/src/adapter.rs @@ -103,7 +103,7 @@ where "Trying to cast a RecordBatch into an incompatible schema. RecordBatch: {}, Target: {}", projected_column.schema(), projected_schema, - )))); + )), None)); } let mut columns = Vec::with_capacity(projected_schema.fields.len()); @@ -218,6 +218,10 @@ impl RecordBatchStreamAdapter { } impl RecordBatchStream for RecordBatchStreamAdapter { + fn name(&self) -> &str { + "RecordBatchStreamAdapter" + } + fn schema(&self) -> SchemaRef { self.schema.clone() } diff --git a/src/common/recordbatch/src/error.rs b/src/common/recordbatch/src/error.rs index 42a2754bb2..b22c0f488d 100644 --- a/src/common/recordbatch/src/error.rs +++ b/src/common/recordbatch/src/error.rs @@ -18,6 +18,7 @@ use std::any::Any; use common_error::ext::{BoxedError, ErrorExt}; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; +use datafusion_common::ScalarValue; use datatypes::prelude::ConcreteDataType; use snafu::{Location, Snafu}; @@ -69,8 +70,9 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to init Recordbatch stream"))] - InitRecordbatchStream { + #[snafu(display("Failed to convert {v:?} to Arrow scalar"))] + ToArrowScalar { + v: ScalarValue, #[snafu(source)] error: datafusion_common::DataFusionError, location: Location, @@ -128,7 +130,7 @@ impl ErrorExt for Error { | Error::CreateRecordBatches { .. } | Error::PollStream { .. } | Error::Format { .. } - | Error::InitRecordbatchStream { .. } + | Error::ToArrowScalar { .. } | Error::ColumnNotExists { .. } | Error::ProjectArrowRecordBatch { .. } | Error::ArrowCompute { .. } => StatusCode::Internal, diff --git a/src/common/recordbatch/src/filter.rs b/src/common/recordbatch/src/filter.rs index 3175ace37e..7a5e361138 100644 --- a/src/common/recordbatch/src/filter.rs +++ b/src/common/recordbatch/src/filter.rs @@ -22,7 +22,7 @@ use datafusion_common::ScalarValue; use datatypes::vectors::VectorRef; use snafu::ResultExt; -use crate::error::{ArrowComputeSnafu, Result, UnsupportedOperationSnafu}; +use crate::error::{ArrowComputeSnafu, Result, ToArrowScalarSnafu, UnsupportedOperationSnafu}; /// An inplace expr evaluator for simple filter. Only support /// - `col` `op` `literal` @@ -69,9 +69,10 @@ impl SimpleFilterEvaluator { _ => return None, }; + let literal = rhs.to_scalar().ok()?; Some(Self { column_name: lhs.name.clone(), - literal: rhs.clone().to_scalar(), + literal, op, }) } @@ -85,7 +86,10 @@ impl SimpleFilterEvaluator { } pub fn evaluate_scalar(&self, input: &ScalarValue) -> Result { - let result = self.evaluate_datum(&input.to_scalar())?; + let input = input + .to_scalar() + .with_context(|_| ToArrowScalarSnafu { v: input.clone() })?; + let result = self.evaluate_datum(&input)?; Ok(result.value(0)) } diff --git a/src/common/recordbatch/src/lib.rs b/src/common/recordbatch/src/lib.rs index f062a6474f..ff3135d9e4 100644 --- a/src/common/recordbatch/src/lib.rs +++ b/src/common/recordbatch/src/lib.rs @@ -37,6 +37,10 @@ pub use recordbatch::RecordBatch; use snafu::{ensure, ResultExt}; pub trait RecordBatchStream: Stream> { + fn name(&self) -> &str { + "RecordBatchStream" + } + fn schema(&self) -> SchemaRef; fn output_ordering(&self) -> Option<&[OrderOption]>; @@ -243,6 +247,10 @@ impl RecordBatchStreamWrapper { impl> + Unpin> RecordBatchStream for RecordBatchStreamWrapper { + fn name(&self) -> &str { + "RecordBatchStreamWrapper" + } + fn schema(&self) -> SchemaRef { self.schema.clone() } diff --git a/src/common/substrait/Cargo.toml b/src/common/substrait/Cargo.toml index 9ac4fc150f..a2fb0e2725 100644 --- a/src/common/substrait/Cargo.toml +++ b/src/common/substrait/Cargo.toml @@ -12,7 +12,9 @@ async-trait.workspace = true bytes.workspace = true catalog.workspace = true common-error.workspace = true +common-function.workspace = true common-macro.workspace = true +common-telemetry.workspace = true datafusion.workspace = true datafusion-common.workspace = true datafusion-expr.workspace = true @@ -20,6 +22,7 @@ datafusion-substrait.workspace = true datatypes.workspace = true promql.workspace = true prost.workspace = true +session.workspace = true snafu.workspace = true [dependencies.substrait_proto] diff --git a/src/common/substrait/src/df_substrait.rs b/src/common/substrait/src/df_substrait.rs index c4e1db9a56..cfc2736829 100644 --- a/src/common/substrait/src/df_substrait.rs +++ b/src/common/substrait/src/df_substrait.rs @@ -16,18 +16,24 @@ use std::sync::Arc; use async_trait::async_trait; use bytes::{Buf, Bytes, BytesMut}; -use datafusion::catalog::CatalogList; +use common_function::function_registry::FUNCTION_REGISTRY; +use common_function::scalars::udf::create_udf; +use datafusion::catalog::CatalogProviderList; use datafusion::execution::context::SessionState; use datafusion::execution::runtime_env::RuntimeEnv; +use datafusion::execution::FunctionRegistry; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_expr::LogicalPlan; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; use datafusion_substrait::logical_plan::producer::to_substrait_plan; +use datafusion_substrait::substrait::proto::Plan; use prost::Message; +use session::context::QueryContextRef; use snafu::ResultExt; -use substrait_proto::proto::Plan; -use crate::error::{DecodeDfPlanSnafu, DecodeRelSnafu, EncodeDfPlanSnafu, EncodeRelSnafu, Error}; +use crate::error::{ + DFInternalSnafu, DecodeDfPlanSnafu, DecodeRelSnafu, EncodeDfPlanSnafu, EncodeRelSnafu, Error, +}; use crate::extension_serializer::ExtensionSerializer; use crate::SubstraitPlan; @@ -42,17 +48,20 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor { async fn decode( &self, message: B, - catalog_list: Arc, - catalog: &str, - schema: &str, + catalog_list: Arc, + mut state: SessionState, + query_ctx: QueryContextRef, ) -> Result { - let state_config = SessionConfig::new().with_default_catalog_and_schema(catalog, schema); - let state = SessionState::new_with_config_rt(state_config, Arc::new(RuntimeEnv::default())) - .with_serializer_registry(Arc::new(ExtensionSerializer)); + // substrait decoder will look up the UDFs in SessionState, so we need to register them + for func in FUNCTION_REGISTRY.functions() { + let udf = Arc::new(create_udf(func, query_ctx.clone(), Default::default()).into()); + state.register_udf(udf).context(DFInternalSnafu)?; + } + let mut context = SessionContext::new_with_state(state); context.register_catalog_list(catalog_list); let plan = Plan::decode(message).context(DecodeRelSnafu)?; - let df_plan = from_substrait_plan(&mut context, &plan) + let df_plan = from_substrait_plan(&context, &plan) .await .context(DecodeDfPlanSnafu)?; Ok(df_plan) diff --git a/src/common/substrait/src/lib.rs b/src/common/substrait/src/lib.rs index 51f8119dbb..ca7e28e8d1 100644 --- a/src/common/substrait/src/lib.rs +++ b/src/common/substrait/src/lib.rs @@ -21,7 +21,9 @@ use std::sync::Arc; use async_trait::async_trait; use bytes::{Buf, Bytes}; -use datafusion::catalog::CatalogList; +use datafusion::catalog::CatalogProviderList; +use datafusion::execution::context::SessionState; +use session::context::QueryContextRef; pub use substrait_proto; pub use crate::df_substrait::DFLogicalSubstraitConvertor; @@ -35,9 +37,9 @@ pub trait SubstraitPlan { async fn decode( &self, message: B, - catalog_list: Arc, - catalog: &str, - schema: &str, + catalog_list: Arc, + state: SessionState, + query_ctx: QueryContextRef, ) -> Result; fn encode(&self, plan: &Self::Plan) -> Result; diff --git a/src/common/time/src/datetime.rs b/src/common/time/src/datetime.rs index d12b87f7f1..f1980a38d1 100644 --- a/src/common/time/src/datetime.rs +++ b/src/common/time/src/datetime.rs @@ -35,11 +35,11 @@ pub struct DateTime(i64); impl Display for DateTime { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if let Some(abs_time) = NaiveDateTime::from_timestamp_millis(self.0) { + if let Some(abs_time) = chrono::DateTime::from_timestamp_millis(self.0) { write!( f, "{}", - format_utc_datetime(&abs_time, DATETIME_FORMAT_WITH_TZ) + format_utc_datetime(&abs_time.naive_utc(), DATETIME_FORMAT_WITH_TZ) ) } else { write!(f, "DateTime({})", self.0) @@ -55,7 +55,7 @@ impl From for serde_json::Value { impl From for DateTime { fn from(value: NaiveDateTime) -> Self { - DateTime::from(value.timestamp_millis()) + DateTime::from(value.and_utc().timestamp_millis()) } } @@ -87,13 +87,15 @@ impl DateTime { pub fn from_str(s: &str, timezone: Option<&Timezone>) -> Result { let s = s.trim(); let timestamp_millis = if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) { - dt.naive_utc().timestamp_millis() + dt.naive_utc().and_utc().timestamp_millis() } else if let Ok(d) = NaiveDateTime::parse_from_str(s, DATETIME_FORMAT) { match datetime_to_utc(&d, get_timezone(timezone)) { LocalResult::None => { return InvalidDateStrSnafu { raw: s }.fail(); } - LocalResult::Single(utc) | LocalResult::Ambiguous(utc, _) => utc.timestamp_millis(), + LocalResult::Single(t) | LocalResult::Ambiguous(t, _) => { + t.and_utc().timestamp_millis() + } } } else if let Ok(v) = chrono::DateTime::parse_from_str(s, DATETIME_FORMAT_WITH_TZ) { v.timestamp_millis() @@ -116,7 +118,7 @@ impl DateTime { /// Convert to [NaiveDateTime]. pub fn to_chrono_datetime(&self) -> Option { - NaiveDateTime::from_timestamp_millis(self.0) + chrono::DateTime::from_timestamp_millis(self.0).map(|x| x.naive_utc()) } /// Format DateTime for given format and timezone. diff --git a/src/common/time/src/timestamp.rs b/src/common/time/src/timestamp.rs index 7010ff8293..95dba04adc 100644 --- a/src/common/time/src/timestamp.rs +++ b/src/common/time/src/timestamp.rs @@ -357,7 +357,7 @@ impl Timestamp { pub fn to_chrono_datetime(&self) -> Option { let (sec, nsec) = self.split(); - NaiveDateTime::from_timestamp_opt(sec, nsec) + chrono::DateTime::from_timestamp(sec, nsec).map(|x| x.naive_utc()) } pub fn to_chrono_datetime_with_timezone(&self, tz: Option<&Timezone>) -> Option { @@ -380,7 +380,7 @@ impl Timestamp { } pub fn from_chrono_datetime(ndt: NaiveDateTime) -> Option { - let sec = ndt.timestamp(); + let sec = ndt.and_utc().timestamp(); let nsec = ndt.timestamp_subsec_nanos(); Timestamp::from_splits(sec, nsec) } @@ -1063,8 +1063,8 @@ mod tests { let _ = Timestamp::new(i64::MAX, TimeUnit::Nanosecond).split(); let _ = Timestamp::new(i64::MIN, TimeUnit::Nanosecond).split(); let (sec, nsec) = Timestamp::new(i64::MIN, TimeUnit::Nanosecond).split(); - let time = NaiveDateTime::from_timestamp_opt(sec, nsec).unwrap(); - assert_eq!(sec, time.timestamp()); + let time = DateTime::from_timestamp(sec, nsec).unwrap().naive_utc(); + assert_eq!(sec, time.and_utc().timestamp()); assert_eq!(nsec, time.timestamp_subsec_nanos()); } @@ -1159,12 +1159,12 @@ mod tests { #[test] fn test_subtract_timestamp() { assert_eq!( - Some(chrono::Duration::milliseconds(42)), + chrono::Duration::try_milliseconds(42), Timestamp::new_millisecond(100).sub(&Timestamp::new_millisecond(58)) ); assert_eq!( - Some(chrono::Duration::milliseconds(-42)), + chrono::Duration::try_milliseconds(-42), Timestamp::new_millisecond(58).sub(&Timestamp::new_millisecond(100)) ); } @@ -1286,8 +1286,8 @@ mod tests { #[test] fn test_from_naive_date_time() { - let naive_date_time_min = NaiveDateTime::MIN; - let naive_date_time_max = NaiveDateTime::MAX; + let naive_date_time_min = NaiveDateTime::MIN.and_utc(); + let naive_date_time_max = NaiveDateTime::MAX.and_utc(); let min_sec = Timestamp::new_second(naive_date_time_min.timestamp()); let max_sec = Timestamp::new_second(naive_date_time_max.timestamp()); diff --git a/src/common/time/src/timezone.rs b/src/common/time/src/timezone.rs index 8d98b072a9..b1fefceb21 100644 --- a/src/common/time/src/timezone.rs +++ b/src/common/time/src/timezone.rs @@ -15,7 +15,7 @@ use std::fmt::Display; use std::str::FromStr; -use chrono::{FixedOffset, NaiveDateTime, TimeZone}; +use chrono::{FixedOffset, TimeZone}; use chrono_tz::{OffsetComponents, Tz}; use once_cell::sync::OnceCell; use snafu::{OptionExt, ResultExt}; @@ -114,7 +114,9 @@ impl Timezone { match self { Self::Offset(offset) => offset.local_minus_utc().into(), Self::Named(tz) => { - let datetime = NaiveDateTime::from_timestamp_opt(0, 0).unwrap(); + let datetime = chrono::DateTime::from_timestamp(0, 0) + .map(|x| x.naive_utc()) + .expect("invalid timestamp"); let datetime = tz.from_utc_datetime(&datetime); let utc_offset = datetime.offset().base_utc_offset(); let dst_offset = datetime.offset().dst_offset(); diff --git a/src/common/time/src/util.rs b/src/common/time/src/util.rs index 6ce824764a..19fe3bc911 100644 --- a/src/common/time/src/util.rs +++ b/src/common/time/src/util.rs @@ -69,7 +69,10 @@ pub fn current_time_rfc3339() -> String { /// Returns the yesterday time in rfc3339 format. pub fn yesterday_rfc3339() -> String { let now = chrono::Utc::now(); - let day_before = now - chrono::Duration::days(1); + let day_before = now + - chrono::Duration::try_days(1).unwrap_or_else(|| { + panic!("now time ('{now}') is too early to calculate the day before") + }); day_before.to_rfc3339() } diff --git a/src/datanode/src/region_server.rs b/src/datanode/src/region_server.rs index 14ebe621a3..1af0fda462 100644 --- a/src/datanode/src/region_server.rs +++ b/src/datanode/src/region_server.rs @@ -36,7 +36,7 @@ use common_telemetry::tracing_context::{FutureExt, TracingContext}; use common_telemetry::{info, warn}; use dashmap::DashMap; use datafusion::catalog::schema::SchemaProvider; -use datafusion::catalog::{CatalogList, CatalogProvider}; +use datafusion::catalog::{CatalogProvider, CatalogProviderList}; use datafusion::datasource::TableProvider; use datafusion::error::Result as DfResult; use datafusion::execution::context::SessionState; @@ -643,10 +643,15 @@ impl RegionServerInner { .await?; let catalog_list = Arc::new(DummyCatalogList::with_table_provider(table_provider)); - + let query_engine_ctx = self.query_engine.engine_context(ctx.clone()); // decode substrait plan to logical plan and execute it let logical_plan = DFLogicalSubstraitConvertor - .decode(Bytes::from(plan), catalog_list, "", "") + .decode( + Bytes::from(plan), + catalog_list, + query_engine_ctx.state().clone(), + ctx.clone(), + ) .await .context(DecodeLogicalPlanSnafu)?; @@ -728,7 +733,7 @@ impl DummyCatalogList { } } -impl CatalogList for DummyCatalogList { +impl CatalogProviderList for DummyCatalogList { fn as_any(&self) -> &dyn Any { self } @@ -786,8 +791,8 @@ impl SchemaProvider for DummySchemaProvider { vec![] } - async fn table(&self, _name: &str) -> Option> { - Some(self.table.clone()) + async fn table(&self, _name: &str) -> DfResult>> { + Ok(Some(self.table.clone())) } fn table_exist(&self, _name: &str) -> bool { @@ -827,7 +832,10 @@ impl TableProvider for DummyTableProvider { limit: Option, ) -> DfResult> { let mut request = self.scan_request.lock().unwrap().clone(); - request.projection = projection.cloned(); + request.projection = match projection { + Some(x) if !x.is_empty() => Some(x.clone()), + _ => None, + }; request.filters = filters.iter().map(|e| Expr::from(e.clone())).collect(); request.limit = limit; diff --git a/src/datatypes/src/error.rs b/src/datatypes/src/error.rs index 316b50e327..df56be87b4 100644 --- a/src/datatypes/src/error.rs +++ b/src/datatypes/src/error.rs @@ -139,6 +139,13 @@ pub enum Error { error: arrow::error::ArrowError, location: Location, }, + + #[snafu(display("Failed to convert Arrow array to scalars"))] + ConvertArrowArrayToScalars { + #[snafu(source)] + error: datafusion_common::DataFusionError, + location: Location, + }, } impl ErrorExt for Error { diff --git a/src/datatypes/src/scalars.rs b/src/datatypes/src/scalars.rs index 60e6da2183..272d91eebf 100644 --- a/src/datatypes/src/scalars.rs +++ b/src/datatypes/src/scalars.rs @@ -437,10 +437,8 @@ mod tests { #[test] fn test_list_value_scalar() { - let list_value = ListValue::new( - Some(Box::new(vec![Value::Int32(123)])), - ConcreteDataType::int32_datatype(), - ); + let list_value = + ListValue::new(vec![Value::Int32(123)], ConcreteDataType::int32_datatype()); let list_ref = ListValueRef::Ref { val: &list_value }; assert_eq!(list_ref, list_value.as_scalar_ref()); assert_eq!(list_value, list_ref.to_owned_scalar()); diff --git a/src/datatypes/src/types/list_type.rs b/src/datatypes/src/types/list_type.rs index 18115837f5..91f4c8654f 100644 --- a/src/datatypes/src/types/list_type.rs +++ b/src/datatypes/src/types/list_type.rs @@ -61,7 +61,7 @@ impl DataType for ListType { } fn default_value(&self) -> Value { - Value::List(ListValue::new(None, *self.item_type.clone())) + Value::List(ListValue::new(vec![], *self.item_type.clone())) } fn as_arrow_type(&self) -> ArrowDataType { @@ -95,7 +95,7 @@ mod tests { assert_eq!("List", t.name()); assert_eq!(LogicalTypeId::List, t.logical_type_id()); assert_eq!( - Value::List(ListValue::new(None, ConcreteDataType::boolean_datatype())), + Value::List(ListValue::new(vec![], ConcreteDataType::boolean_datatype())), t.default_value() ); assert_eq!( diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index 1483b82adc..01c1983f32 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -17,6 +17,7 @@ use std::fmt::{Display, Formatter}; use std::sync::Arc; use arrow::datatypes::{DataType as ArrowDataType, Field}; +use arrow_array::{Array, ListArray}; use common_base::bytes::{Bytes, StringBytes}; use common_decimal::Decimal128; use common_telemetry::logging; @@ -31,8 +32,7 @@ pub use ordered_float::OrderedFloat; use serde::{Deserialize, Serialize}; use snafu::{ensure, ResultExt}; -use crate::error; -use crate::error::{Error, Result, TryFromValueSnafu}; +use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Error, Result, TryFromValueSnafu}; use crate::prelude::*; use crate::type_id::LogicalTypeId; use crate::types::{IntervalType, ListType}; @@ -110,9 +110,8 @@ impl Display for Value { Value::Interval(v) => write!(f, "{}", v.to_iso8601_string()), Value::Duration(d) => write!(f, "{d}"), Value::List(v) => { - let default = Box::>::default(); - let items = v.items().as_ref().unwrap_or(&default); - let items = items + let items = v + .items() .iter() .map(|i| i.to_string()) .collect::>() @@ -424,9 +423,10 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result ScalarValue::IntervalDayTime(None), IntervalType::MonthDayNano(_) => ScalarValue::IntervalMonthDayNano(None), }, - ConcreteDataType::List(_) => { - ScalarValue::List(None, Arc::new(new_item_field(output_type.as_arrow_type()))) - } + ConcreteDataType::List(_) => ScalarValue::List(Arc::new(ListArray::new_null( + Arc::new(new_item_field(output_type.as_arrow_type())), + 0, + ))), ConcreteDataType::Dictionary(dict) => ScalarValue::Dictionary( Box::new(dict.key_type().as_arrow_type()), Box::new(to_null_scalar_value(dict.value_type())?), @@ -715,9 +715,7 @@ impl TryFrom for serde_json::Value { /// List value. #[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize)] pub struct ListValue { - /// List of nested Values (boxed to reduce size_of(Value)) - #[allow(clippy::box_collection)] - items: Option>>, + items: Vec, /// Inner values datatype, to distinguish empty lists of different datatypes. /// Restricted by DataFusion, cannot use null datatype for empty list. datatype: ConcreteDataType, @@ -726,11 +724,11 @@ pub struct ListValue { impl Eq for ListValue {} impl ListValue { - pub fn new(items: Option>>, datatype: ConcreteDataType) -> Self { + pub fn new(items: Vec, datatype: ConcreteDataType) -> Self { Self { items, datatype } } - pub fn items(&self) -> &Option>> { + pub fn items(&self) -> &[Value] { &self.items } @@ -739,38 +737,30 @@ impl ListValue { } fn try_to_scalar_value(&self, output_type: &ListType) -> Result { - let vs = if let Some(items) = self.items() { - Some( - items - .iter() - .map(|v| v.try_to_scalar_value(output_type.item_type())) - .collect::>>()?, - ) - } else { - None - }; - - Ok(ScalarValue::List( - vs, - Arc::new(new_item_field(output_type.item_type().as_arrow_type())), - )) + let vs = self + .items + .iter() + .map(|v| v.try_to_scalar_value(output_type.item_type())) + .collect::>>()?; + Ok(ScalarValue::List(ScalarValue::new_list( + &vs, + &self.datatype.as_arrow_type(), + ))) } /// use 'the first item size' * 'length of items' to estimate the size. /// it could be inaccurate. fn estimated_size(&self) -> usize { - if let Some(items) = &self.items { - if let Some(item) = items.first() { - return item.as_value_ref().data_size() * items.len(); - } - } - 0 + self.items + .first() + .map(|x| x.as_value_ref().data_size() * self.items.len()) + .unwrap_or(0) } } impl Default for ListValue { fn default() -> ListValue { - ListValue::new(None, ConcreteDataType::null_datatype()) + ListValue::new(vec![], ConcreteDataType::null_datatype()) } } @@ -824,17 +814,14 @@ impl TryFrom for Value { ScalarValue::Binary(b) | ScalarValue::LargeBinary(b) | ScalarValue::FixedSizeBinary(_, b) => Value::from(b.map(Bytes::from)), - ScalarValue::List(vs, field) | ScalarValue::Fixedsizelist(vs, field, _) => { - let items = if let Some(vs) = vs { - let vs = vs - .into_iter() - .map(ScalarValue::try_into) - .collect::>()?; - Some(Box::new(vs)) - } else { - None - }; - let datatype = ConcreteDataType::try_from(field.data_type())?; + ScalarValue::List(array) => { + let datatype = ConcreteDataType::try_from(array.data_type())?; + let items = ScalarValue::convert_array_to_scalar_vec(array.as_ref()) + .context(ConvertArrowArrayToScalarsSnafu)? + .into_iter() + .flatten() + .map(|x| x.try_into()) + .collect::>>()?; Value::List(ListValue::new(items, datatype)) } ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null), @@ -891,8 +878,11 @@ impl TryFrom for Value { .map(|v| Value::Decimal128(Decimal128::new(v, p, s))) .unwrap_or(Value::Null), ScalarValue::Decimal256(_, _, _) - | ScalarValue::Struct(_, _) - | ScalarValue::Dictionary(_, _) => { + | ScalarValue::Struct(_) + | ScalarValue::FixedSizeList(_) + | ScalarValue::LargeList(_) + | ScalarValue::Dictionary(_, _) + | ScalarValue::Union(_, _, _) => { return error::UnsupportedArrowTypeSnafu { arrow_type: v.data_type(), } @@ -1382,19 +1372,22 @@ mod tests { assert_eq!( Value::List(ListValue::new( - Some(Box::new(vec![Value::Int32(1), Value::Null])), - ConcreteDataType::int32_datatype() + vec![Value::Int32(1), Value::Null], + ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()) )), - ScalarValue::new_list( - Some(vec![ScalarValue::Int32(Some(1)), ScalarValue::Int32(None)]), - ArrowDataType::Int32, - ) + ScalarValue::List(ScalarValue::new_list( + &[ScalarValue::Int32(Some(1)), ScalarValue::Int32(None)], + &ArrowDataType::Int32, + )) .try_into() .unwrap() ); assert_eq!( - Value::List(ListValue::new(None, ConcreteDataType::uint32_datatype())), - ScalarValue::new_list(None, ArrowDataType::UInt32) + Value::List(ListValue::new( + vec![], + ConcreteDataType::list_datatype(ConcreteDataType::uint32_datatype()) + )), + ScalarValue::List(ScalarValue::new_list(&[], &ArrowDataType::UInt32)) .try_into() .unwrap() ); @@ -1664,7 +1657,7 @@ mod tests { check_type_and_value( &ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()), &Value::List(ListValue::new( - Some(Box::new(vec![Value::Int32(10)])), + vec![Value::Int32(10)], ConcreteDataType::int32_datatype(), )), ); @@ -1829,7 +1822,7 @@ mod tests { assert_eq!( json_value, to_json(Value::List(ListValue { - items: Some(Box::new(vec![Value::Int32(123)])), + items: vec![Value::Int32(123)], datatype: ConcreteDataType::int32_datatype(), })) ); @@ -1897,7 +1890,7 @@ mod tests { check_as_value_ref!(DateTime, DateTime::new(1034)); let list = ListValue { - items: None, + items: vec![], datatype: ConcreteDataType::int32_datatype(), }; assert_eq!( @@ -1935,7 +1928,7 @@ mod tests { check_as_correct!(Time::new_second(12), Time, as_time); check_as_correct!(Duration::new_second(12), Duration, as_duration); let list = ListValue { - items: None, + items: vec![], datatype: ConcreteDataType::int32_datatype(), }; check_as_correct!(ListValueRef::Ref { val: &list }, List, as_list); @@ -1991,7 +1984,7 @@ mod tests { ); assert_eq!( Value::List(ListValue::new( - Some(Box::new(vec![Value::Int8(1), Value::Int8(2)])), + vec![Value::Int8(1), Value::Int8(2)], ConcreteDataType::int8_datatype(), )) .to_string(), @@ -1999,7 +1992,7 @@ mod tests { ); assert_eq!( Value::List(ListValue::new( - Some(Box::default()), + vec![], ConcreteDataType::timestamp_second_datatype(), )) .to_string(), @@ -2007,7 +2000,7 @@ mod tests { ); assert_eq!( Value::List(ListValue::new( - Some(Box::default()), + vec![], ConcreteDataType::timestamp_millisecond_datatype(), )) .to_string(), @@ -2015,7 +2008,7 @@ mod tests { ); assert_eq!( Value::List(ListValue::new( - Some(Box::default()), + vec![], ConcreteDataType::timestamp_microsecond_datatype(), )) .to_string(), @@ -2023,7 +2016,7 @@ mod tests { ); assert_eq!( Value::List(ListValue::new( - Some(Box::default()), + vec![], ConcreteDataType::timestamp_nanosecond_datatype(), )) .to_string(), @@ -2253,19 +2246,29 @@ mod tests { #[test] fn test_list_value_to_scalar_value() { - let items = Some(Box::new(vec![Value::Int32(-1), Value::Null])); + let items = vec![Value::Int32(-1), Value::Null]; let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype())); let df_list = list .try_to_scalar_value(&ConcreteDataType::list_datatype( ConcreteDataType::int32_datatype(), )) .unwrap(); - assert!(matches!(df_list, ScalarValue::List(_, _))); + assert!(matches!(df_list, ScalarValue::List(_))); match df_list { - ScalarValue::List(vs, field) => { - assert_eq!(ArrowDataType::Int32, *field.data_type()); + ScalarValue::List(vs) => { + assert_eq!( + ArrowDataType::List(Arc::new(Field::new_list_field( + ArrowDataType::Int32, + true + ))), + *vs.data_type() + ); - let vs = vs.unwrap(); + let vs = ScalarValue::convert_array_to_scalar_vec(vs.as_ref()) + .unwrap() + .into_iter() + .flatten() + .collect::>(); assert_eq!( vs, vec![ScalarValue::Int32(Some(-1)), ScalarValue::Int32(None)] @@ -2367,10 +2370,10 @@ mod tests { check_value_ref_size_eq( &ValueRef::List(ListValueRef::Ref { val: &ListValue { - items: Some(Box::new(vec![ + items: vec![ Value::String("hello world".into()), Value::String("greptimedb".into()), - ])), + ], datatype: ConcreteDataType::string_datatype(), }, }), @@ -2387,7 +2390,6 @@ mod tests { for vec_opt in &data { if let Some(vec) = vec_opt { let values = vec.iter().map(|v| Value::from(*v)).collect(); - let values = Some(Box::new(values)); let list_value = ListValue::new(values, ConcreteDataType::int32_datatype()); builder.push(Some(ListValueRef::Ref { val: &list_value })); diff --git a/src/datatypes/src/vectors/constant.rs b/src/datatypes/src/vectors/constant.rs index 2d731ee993..66587cf1d7 100644 --- a/src/datatypes/src/vectors/constant.rs +++ b/src/datatypes/src/vectors/constant.rs @@ -223,7 +223,7 @@ mod tests { assert_eq!(10, c.len()); assert!(c.validity().is_all_valid()); assert!(!c.only_null()); - assert_eq!(64, c.memory_size()); + assert_eq!(4, c.memory_size()); for i in 0..10 { assert!(!c.is_null(i)); diff --git a/src/datatypes/src/vectors/helper.rs b/src/datatypes/src/vectors/helper.rs index ef7dab842f..21c37ec077 100644 --- a/src/datatypes/src/vectors/helper.rs +++ b/src/datatypes/src/vectors/helper.rs @@ -26,9 +26,9 @@ use datafusion_common::ScalarValue; use snafu::{OptionExt, ResultExt}; use crate::data_type::ConcreteDataType; -use crate::error::{self, Result}; +use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result}; use crate::scalars::{Scalar, ScalarVectorBuilder}; -use crate::value::{ListValue, ListValueRef}; +use crate::value::{ListValue, ListValueRef, Value}; use crate::vectors::{ BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Decimal128Vector, DurationMicrosecondVector, DurationMillisecondVector, DurationNanosecondVector, @@ -160,19 +160,18 @@ impl Helper { | ScalarValue::FixedSizeBinary(_, v) => { ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length) } - ScalarValue::List(v, field) | ScalarValue::Fixedsizelist(v, field, _) => { - let item_type = ConcreteDataType::try_from(field.data_type())?; + ScalarValue::List(array) => { + let item_type = ConcreteDataType::try_from(&array.value_type())?; let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1); - if let Some(values) = v { - let values = values - .into_iter() - .map(ScalarValue::try_into) - .collect::>()?; - let list_value = ListValue::new(Some(Box::new(values)), item_type); - builder.push(Some(ListValueRef::Ref { val: &list_value })); - } else { - builder.push(None); - } + let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref()) + .context(ConvertArrowArrayToScalarsSnafu)? + .into_iter() + .flatten() + .map(ScalarValue::try_into) + .collect::>>()?; + builder.push(Some(ListValueRef::Ref { + val: &ListValue::new(values, item_type), + })); let list_vector = builder.to_vector(); ConstantVector::new(list_vector, length) } @@ -236,8 +235,11 @@ impl Helper { ConstantVector::new(Arc::new(vector), length) } ScalarValue::Decimal256(_, _, _) - | ScalarValue::Struct(_, _) - | ScalarValue::Dictionary(_, _) => { + | ScalarValue::Struct(_) + | ScalarValue::FixedSizeList(_) + | ScalarValue::LargeList(_) + | ScalarValue::Dictionary(_, _) + | ScalarValue::Union(_, _, _) => { return error::ConversionSnafu { from: format!("Unsupported scalar value: {value}"), } @@ -351,7 +353,11 @@ impl Helper { | ArrowDataType::Dictionary(_, _) | ArrowDataType::Decimal256(_, _) | ArrowDataType::Map(_, _) - | ArrowDataType::RunEndEncoded(_, _) => { + | ArrowDataType::RunEndEncoded(_, _) + | ArrowDataType::BinaryView + | ArrowDataType::Utf8View + | ArrowDataType::ListView(_) + | ArrowDataType::LargeListView(_) => { return error::UnsupportedArrowTypeSnafu { arrow_type: array.as_ref().data_type().clone(), } @@ -396,7 +402,7 @@ mod tests { TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; - use arrow::datatypes::{Field, Int32Type}; + use arrow::datatypes::Int32Type; use arrow_array::DictionaryArray; use common_decimal::Decimal128; use common_time::time::Time; @@ -486,13 +492,10 @@ mod tests { #[test] fn test_try_from_list_value() { - let value = ScalarValue::List( - Some(vec![ - ScalarValue::Int32(Some(1)), - ScalarValue::Int32(Some(2)), - ]), - Arc::new(Field::new("item", ArrowDataType::Int32, true)), - ); + let value = ScalarValue::List(ScalarValue::new_list( + &[ScalarValue::Int32(Some(1)), ScalarValue::Int32(Some(2))], + &ArrowDataType::Int32, + )); let vector = Helper::try_from_scalar_value(value, 3).unwrap(); assert_eq!( ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()), @@ -501,8 +504,8 @@ mod tests { assert_eq!(3, vector.len()); for i in 0..vector.len() { let v = vector.get(i); - let items = v.as_list().unwrap().unwrap().items().as_ref().unwrap(); - assert_eq!(vec![Value::Int32(1), Value::Int32(2)], **items); + let items = v.as_list().unwrap().unwrap().items(); + assert_eq!(vec![Value::Int32(1), Value::Int32(2)], items); } } diff --git a/src/datatypes/src/vectors/list.rs b/src/datatypes/src/vectors/list.rs index aaf88bbd98..99399edcf4 100644 --- a/src/datatypes/src/vectors/list.rs +++ b/src/datatypes/src/vectors/list.rs @@ -114,10 +114,7 @@ impl Vector for ListVector { let values = (0..vector.len()) .map(|i| vector.get(i)) .collect::>(); - Value::List(ListValue::new( - Some(Box::new(values)), - self.item_type.clone(), - )) + Value::List(ListValue::new(values, self.item_type.clone())) } fn get_ref(&self, index: usize) -> ValueRef { @@ -248,11 +245,8 @@ impl ListVectorBuilder { } fn push_list_value(&mut self, list_value: &ListValue) -> Result<()> { - if let Some(items) = list_value.items() { - for item in &**items { - self.values_builder - .try_push_value_ref(item.as_value_ref())?; - } + for v in list_value.items() { + self.values_builder.try_push_value_ref(v.as_value_ref())?; } self.finish_list(true); @@ -496,7 +490,6 @@ pub mod tests { for vec_opt in data { if let Some(vec) = vec_opt { let values = vec.iter().map(|v| Value::from(*v)).collect(); - let values = Some(Box::new(values)); let list_value = ListValue::new(values, ConcreteDataType::int32_datatype()); builder.push(Some(ListValueRef::Ref { val: &list_value })); @@ -576,11 +569,7 @@ pub mod tests { assert_eq!( Value::List(ListValue::new( - Some(Box::new(vec![ - Value::Int32(1), - Value::Int32(2), - Value::Int32(3) - ])), + vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)], ConcreteDataType::int32_datatype() )), list_vector.get(0) @@ -599,11 +588,7 @@ pub mod tests { assert_eq!(Value::Null, list_vector.get(1)); assert_eq!( Value::List(ListValue::new( - Some(Box::new(vec![ - Value::Int32(4), - Value::Null, - Value::Int32(6) - ])), + vec![Value::Int32(4), Value::Null, Value::Int32(6)], ConcreteDataType::int32_datatype() )), list_vector.get(2) @@ -680,11 +665,7 @@ pub mod tests { ListType::new(ConcreteDataType::int32_datatype()).create_mutable_vector(3); builder.push_value_ref(ValueRef::List(ListValueRef::Ref { val: &ListValue::new( - Some(Box::new(vec![ - Value::Int32(4), - Value::Null, - Value::Int32(6), - ])), + vec![Value::Int32(4), Value::Null, Value::Int32(6)], ConcreteDataType::int32_datatype(), ), })); @@ -717,11 +698,7 @@ pub mod tests { builder.push(None); builder.push(Some(ListValueRef::Ref { val: &ListValue::new( - Some(Box::new(vec![ - Value::Int32(4), - Value::Null, - Value::Int32(6), - ])), + vec![Value::Int32(4), Value::Null, Value::Int32(6)], ConcreteDataType::int32_datatype(), ), })); @@ -772,11 +749,7 @@ pub mod tests { builder.push(None); builder.push(Some(ListValueRef::Ref { val: &ListValue::new( - Some(Box::new(vec![ - Value::Int32(4), - Value::Null, - Value::Int32(6), - ])), + vec![Value::Int32(4), Value::Null, Value::Int32(6)], ConcreteDataType::int32_datatype(), ), })); diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs index 7e7e3dd50b..fc0007a4b6 100644 --- a/src/datatypes/src/vectors/primitive.rs +++ b/src/datatypes/src/vectors/primitive.rs @@ -531,9 +531,9 @@ mod tests { #[test] fn test_memory_size() { let v = Int32Vector::from_slice((0..5).collect::>()); - assert_eq!(64, v.memory_size()); + assert_eq!(20, v.memory_size()); let v = Int64Vector::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]); - assert_eq!(128, v.memory_size()); + assert_eq!(144, v.memory_size()); } #[test] diff --git a/src/file-engine/Cargo.toml b/src/file-engine/Cargo.toml index ea24b07b77..f9cd1113f5 100644 --- a/src/file-engine/Cargo.toml +++ b/src/file-engine/Cargo.toml @@ -25,6 +25,7 @@ common-telemetry.workspace = true common-test-util = { workspace = true, optional = true } common-time.workspace = true datafusion.workspace = true +datafusion-expr.workspace = true datatypes.workspace = true futures.workspace = true object-store.workspace = true diff --git a/src/file-engine/src/query/file_stream.rs b/src/file-engine/src/query/file_stream.rs index 105785979e..f3afd1fbc5 100644 --- a/src/file-engine/src/query/file_stream.rs +++ b/src/file-engine/src/query/file_stream.rs @@ -23,15 +23,16 @@ use common_query::prelude::Expr; use common_query::DfPhysicalPlan; use common_recordbatch::adapter::RecordBatchStreamAdapter; use common_recordbatch::SendableRecordBatchStream; -use datafusion::common::ToDFSchema; +use datafusion::common::{Statistics, ToDFSchema}; +use datafusion::config::TableParquetOptions; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::object_store::ObjectStoreUrl; use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec}; -use datafusion::optimizer::utils::conjunction; use datafusion::physical_expr::create_physical_expr; use datafusion::physical_expr::execution_props::ExecutionProps; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion::prelude::SessionContext; +use datafusion_expr::utils::conjunction; use datatypes::arrow::datatypes::Schema as ArrowSchema; use datatypes::schema::SchemaRef; use object_store::ObjectStore; @@ -101,6 +102,7 @@ fn build_record_batch_stream( projection: Option<&Vec>, limit: Option, ) -> Result { + let statistics = Statistics::new_unknown(file_schema.as_ref()); let stream = FileStream::new( &FileScanConfig { object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used @@ -109,12 +111,11 @@ fn build_record_batch_stream( .iter() .map(|filename| PartitionedFile::new(filename.to_string(), 0)) .collect::>()], - statistics: Default::default(), + statistics, projection: projection.cloned(), limit, table_partition_cols: vec![], output_ordering: vec![], - infinite_source: false, }, 0, // partition: hard-code opener, @@ -173,12 +174,11 @@ fn new_parquet_stream_with_exec_plan( .iter() .map(|filename| PartitionedFile::new(filename.to_string(), 0)) .collect::>()], - statistics: Default::default(), + statistics: Statistics::new_unknown(file_schema.as_ref()), projection: projection.cloned(), limit: *limit, table_partition_cols: vec![], output_ordering: vec![], - infinite_source: false, }; // build predicate filter @@ -192,7 +192,7 @@ fn new_parquet_stream_with_exec_plan( .to_dfschema_ref() .context(error::ParquetScanPlanSnafu)?; - let filters = create_physical_expr(&expr, &df_schema, &file_schema, &ExecutionProps::new()) + let filters = create_physical_expr(&expr, &df_schema, &ExecutionProps::new()) .context(error::ParquetScanPlanSnafu)?; Some(filters) } else { @@ -201,7 +201,7 @@ fn new_parquet_stream_with_exec_plan( // TODO(ruihang): get this from upper layer let task_ctx = SessionContext::default().task_ctx(); - let parquet_exec = ParquetExec::new(scan_config, filters, None) + let parquet_exec = ParquetExec::new(scan_config, filters, None, TableParquetOptions::default()) .with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new( store.clone(), ))); diff --git a/src/flow/clippy.toml b/src/flow/clippy.toml index 5a9ebd2a5b..87e7f07bc8 100644 --- a/src/flow/clippy.toml +++ b/src/flow/clippy.toml @@ -1,3 +1,5 @@ # Whether to only check for missing documentation in items visible within the current crate. For example, pub(crate) items. (default: false) # This is a config for clippy::missing_docs_in_private_items missing-docs-in-crate-items = true + +too-many-lines-threshold = 500 diff --git a/src/flow/src/transform/expr.rs b/src/flow/src/transform/expr.rs index 3f65c4b607..c8bff7da5c 100644 --- a/src/flow/src/transform/expr.rs +++ b/src/flow/src/transform/expr.rs @@ -33,6 +33,28 @@ use crate::repr::{ColumnType, RelationType}; use crate::transform::literal::{from_substrait_literal, from_substrait_type}; use crate::transform::FunctionExtensions; +// TODO: found proper place for this +/// ref to `arrow_schema::datatype` for type name +fn typename_to_cdt(name: &str) -> CDT { + match name { + "Int8" => CDT::int8_datatype(), + "Int16" => CDT::int16_datatype(), + "Int32" => CDT::int32_datatype(), + "Int64" => CDT::int64_datatype(), + "UInt8" => CDT::uint8_datatype(), + "UInt16" => CDT::uint16_datatype(), + "UInt32" => CDT::uint32_datatype(), + "UInt64" => CDT::uint64_datatype(), + "Float32" => CDT::float32_datatype(), + "Float64" => CDT::float64_datatype(), + "Boolean" => CDT::boolean_datatype(), + "String" => CDT::string_datatype(), + "Date" => CDT::date_datatype(), + "Timestamp" => CDT::timestamp_second_datatype(), + _ => CDT::null_datatype(), + } +} + impl TypedExpr { /// Convert ScalarFunction into Flow's ScalarExpr pub fn from_substrait_scalar_func( @@ -87,6 +109,21 @@ impl TypedExpr { Ok(TypedExpr::new(arg.call_unary(func), ret_type)) } + 2 if fn_name == "arrow_cast" => { + let cast_to = arg_exprs[1] + .clone() + .as_literal() + .and_then(|lit| lit.as_string()) + .with_context(|| InvalidQuerySnafu { + reason: "array_cast's second argument must be a literal string", + })?; + let cast_to = typename_to_cdt(&cast_to); + let func = UnaryFunc::Cast(cast_to); + let arg = arg_exprs[0].clone(); + let ret_type = ColumnType::new_nullable(func.signature().output.clone()); + + Ok(TypedExpr::new(arg.call_unary(func), ret_type)) + } // because variadic function can also have 2 arguments, we need to check if it's a variadic function first 2 if VariadicFunc::from_str_and_types(fn_name, &arg_types).is_err() => { let (func, signature) = diff --git a/src/meta-srv/src/service/admin.rs b/src/meta-srv/src/service/admin.rs index 6e38fc22fc..3cd1a22938 100644 --- a/src/meta-srv/src/service/admin.rs +++ b/src/meta-srv/src/service/admin.rs @@ -29,7 +29,7 @@ use std::task::{Context, Poll}; use tonic::body::BoxBody; use tonic::codegen::{empty_body, http, BoxFuture, Service}; -use tonic::transport::NamedService; +use tonic::server::NamedService; use crate::metasrv::Metasrv; diff --git a/src/mito2/src/cache/cache_size.rs b/src/mito2/src/cache/cache_size.rs index 8ecd2d5e99..3d79bcbe58 100644 --- a/src/mito2/src/cache/cache_size.rs +++ b/src/mito2/src/cache/cache_size.rs @@ -137,6 +137,6 @@ mod tests { fn test_parquet_meta_size() { let metadata = parquet_meta(); - assert_eq!(948, parquet_meta_size(&metadata)); + assert_eq!(956, parquet_meta_size(&metadata)); } } diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs index dfbf22c4b0..64c13778e3 100644 --- a/src/mito2/src/engine/basic_test.rs +++ b/src/mito2/src/engine/basic_test.rs @@ -550,7 +550,7 @@ async fn test_region_usage() { flush_region(&engine, region_id, None).await; let region_stat = region.region_usage().await; - assert_eq!(region_stat.sst_usage, 2962); + assert_eq!(region_stat.sst_usage, 3010); // region total usage // Some memtables may share items. diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index 03d0e60715..201717fc7e 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -685,7 +685,9 @@ impl ErrorExt for Error { CleanDir { .. } => StatusCode::Unexpected, InvalidConfig { .. } => StatusCode::InvalidArguments, StaleLogEntry { .. } => StatusCode::Unexpected, + FilterRecordBatch { source, .. } => source.status_code(), + Upload { .. } => StatusCode::StorageUnavailable, BiError { .. } => StatusCode::Internal, EncodeMemtable { .. } | ReadDataPart { .. } => StatusCode::Internal, diff --git a/src/mito2/src/memtable/partition_tree/tree.rs b/src/mito2/src/memtable/partition_tree/tree.rs index ca0b478c87..3f3dd236c4 100644 --- a/src/mito2/src/memtable/partition_tree/tree.rs +++ b/src/mito2/src/memtable/partition_tree/tree.rs @@ -211,8 +211,9 @@ impl PartitionTree { }; let filters = predicate - .map(|p| { - p.exprs() + .map(|predicate| { + predicate + .exprs() .iter() .filter_map(|f| SimpleFilterEvaluator::try_new(f.df_expr())) .collect::>() diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index 3991504e51..fe93882681 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -238,7 +238,9 @@ impl Memtable for TimeSeriesMemtable { .collect() }; - let iter = self.series_set.iter_series(projection, filters, self.dedup); + let iter = self + .series_set + .iter_series(projection, filters, self.dedup)?; Ok(Box::new(iter)) } @@ -348,7 +350,7 @@ impl SeriesSet { projection: HashSet, predicate: Option, dedup: bool, - ) -> Iter { + ) -> Result { let primary_key_schema = primary_key_schema(&self.region_metadata); let primary_key_datatypes = self .region_metadata @@ -356,7 +358,7 @@ impl SeriesSet { .map(|pk| pk.column_schema.data_type.clone()) .collect(); - Iter::new( + Iter::try_new( self.region_metadata.clone(), self.series.clone(), projection, @@ -417,7 +419,7 @@ struct Iter { impl Iter { #[allow(clippy::too_many_arguments)] - pub(crate) fn new( + pub(crate) fn try_new( metadata: RegionMetadataRef, series: Arc, projection: HashSet, @@ -426,27 +428,28 @@ impl Iter { pk_datatypes: Vec, codec: Arc, dedup: bool, - ) -> Self { - let simple_filters = predicate - .map(|p| { - p.exprs() + ) -> Result { + let predicate = predicate + .map(|predicate| { + predicate + .exprs() .iter() .filter_map(|f| SimpleFilterEvaluator::try_new(f.df_expr())) .collect::>() }) .unwrap_or_default(); - Self { + Ok(Self { metadata, series, projection, last_key: None, - predicate: simple_filters, + predicate, pk_schema, pk_datatypes, codec, dedup, metrics: Metrics::default(), - } + }) } } diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 60aa0afa54..71996eba5b 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -180,11 +180,12 @@ impl ParquetReaderBuilder { metrics.build_cost = start.elapsed(); - let predicate = if let Some(p) = &self.predicate { - p.exprs() + let predicate = if let Some(predicate) = &self.predicate { + predicate + .exprs() .iter() .filter_map(|expr| SimpleFilterEvaluator::try_new(expr.df_expr())) - .collect() + .collect::>() } else { vec![] }; diff --git a/src/mito2/src/sst/parquet/stats.rs b/src/mito2/src/sst/parquet/stats.rs index a17e8ace45..43e0e3cfb9 100644 --- a/src/mito2/src/sst/parquet/stats.rs +++ b/src/mito2/src/sst/parquet/stats.rs @@ -18,8 +18,8 @@ use std::borrow::Borrow; use std::collections::HashSet; use datafusion::physical_optimizer::pruning::PruningStatistics; -use datafusion_common::Column; -use datatypes::arrow::array::ArrayRef; +use datafusion_common::{Column, ScalarValue}; +use datatypes::arrow::array::{ArrayRef, BooleanArray}; use parquet::file::metadata::RowGroupMetaData; use store_api::storage::ColumnId; @@ -81,4 +81,14 @@ impl<'a, T: Borrow> PruningStatistics for RowGroupPruningStats let column_id = self.column_id_to_prune(&column.name)?; self.read_format.null_counts(self.row_groups, column_id) } + + fn row_counts(&self, _column: &Column) -> Option { + // TODO(LFC): Impl it. + None + } + + fn contained(&self, _column: &Column, _values: &HashSet) -> Option { + // TODO(LFC): Impl it. + None + } } diff --git a/src/operator/src/delete.rs b/src/operator/src/delete.rs index 60e44cb7c6..46f235123c 100644 --- a/src/operator/src/delete.rs +++ b/src/operator/src/delete.rs @@ -124,6 +124,7 @@ impl Deleter { let request_factory = RegionRequestFactory::new(RegionRequestHeader { tracing_context: TracingContext::from_current_span().to_w3c(), dbname: ctx.get_db_string(), + ..Default::default() }); let tasks = self diff --git a/src/operator/src/expr_factory.rs b/src/operator/src/expr_factory.rs index 0975f9b17d..42d8ee3187 100644 --- a/src/operator/src/expr_factory.rs +++ b/src/operator/src/expr_factory.rs @@ -224,10 +224,15 @@ fn find_primary_keys( let columns_pk = columns .iter() .filter_map(|x| { - if x.options - .iter() - .any(|o| matches!(o.option, ColumnOption::Unique { is_primary: true })) - { + if x.options.iter().any(|o| { + matches!( + o.option, + ColumnOption::Unique { + is_primary: true, + .. + } + ) + }) { Some(x.name.value.clone()) } else { None @@ -249,6 +254,7 @@ fn find_primary_keys( name: _, columns, is_primary: true, + .. } => Some(columns.iter().map(|ident| ident.value.clone())), _ => None, }) @@ -276,6 +282,7 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result { name: Some(name), columns, is_primary: false, + .. } => { if name.value == TIME_INDEX { Some(columns.iter().map(|ident| &ident.value)) diff --git a/src/operator/src/insert.rs b/src/operator/src/insert.rs index 5460fe9a59..61d2d2cfcf 100644 --- a/src/operator/src/insert.rs +++ b/src/operator/src/insert.rs @@ -196,6 +196,7 @@ impl Inserter { let request_factory = RegionRequestFactory::new(RegionRequestHeader { tracing_context: TracingContext::from_current_span().to_w3c(), dbname: ctx.get_db_string(), + ..Default::default() }); let tasks = self diff --git a/src/operator/src/request.rs b/src/operator/src/request.rs index 3aba4dfd70..7cfd5a8a1a 100644 --- a/src/operator/src/request.rs +++ b/src/operator/src/request.rs @@ -162,6 +162,7 @@ impl Requester { let request_factory = RegionRequestFactory::new(RegionRequestHeader { tracing_context: TracingContext::from_current_span().to_w3c(), dbname: db_string.unwrap_or_else(|| ctx.get_db_string()), + ..Default::default() }); let tasks = requests.into_iter().map(|req_body| { diff --git a/src/operator/src/statement/copy_table_from.rs b/src/operator/src/statement/copy_table_from.rs index c8c1ae3688..cb922742ff 100644 --- a/src/operator/src/statement/copy_table_from.rs +++ b/src/operator/src/statement/copy_table_from.rs @@ -36,6 +36,7 @@ use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStre use datafusion::parquet::arrow::arrow_reader::ArrowReaderMetadata; use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; +use datafusion_common::Statistics; use datatypes::arrow::compute::can_cast_types; use datatypes::arrow::datatypes::{Schema, SchemaRef}; use datatypes::vectors::Helper; @@ -184,17 +185,17 @@ impl StatementExecutor { filename: &str, file_schema: SchemaRef, ) -> Result { + let statistics = Statistics::new_unknown(file_schema.as_ref()); let stream = FileStream::new( &FileScanConfig { object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used file_schema, file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]], - statistics: Default::default(), + statistics, projection: None, limit: None, table_partition_cols: vec![], output_ordering: vec![], - infinite_source: false, }, 0, opener, diff --git a/src/operator/src/statement/copy_table_to.rs b/src/operator/src/statement/copy_table_to.rs index 2a4d4a0ca7..282bb03979 100644 --- a/src/operator/src/statement/copy_table_to.rs +++ b/src/operator/src/statement/copy_table_to.rs @@ -118,7 +118,7 @@ impl StatementExecutor { let table_source = Arc::new(DefaultTableSource::new(table_provider)); let mut builder = LogicalPlanBuilder::scan_with_filters( - df_table_ref.to_owned_reference(), + df_table_ref, table_source, None, filters.clone(), diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index 9e7bc01ae8..893b8b24c3 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -20,9 +20,12 @@ common-query.workspace = true common-recordbatch.workspace = true common-telemetry.workspace = true datafusion.workspace = true +datafusion-expr.workspace = true +datafusion-functions.workspace = true datatypes.workspace = true futures = "0.3" greptime-proto.workspace = true +itertools.workspace = true lazy_static.workspace = true prometheus.workspace = true promql-parser = "0.1.1" diff --git a/src/promql/src/extension_plan/empty_metric.rs b/src/promql/src/extension_plan/empty_metric.rs index 74d25d8ebb..f26337b2f6 100644 --- a/src/promql/src/extension_plan/empty_metric.rs +++ b/src/promql/src/extension_plan/empty_metric.rs @@ -20,16 +20,20 @@ use std::sync::Arc; use std::task::{Context, Poll}; use datafusion::arrow::array::ArrayRef; -use datafusion::arrow::datatypes::{DataType, Schema as ArrowSchema, TimeUnit}; -use datafusion::common::{DFField, DFSchema, DFSchemaRef, Result as DataFusionResult, Statistics}; +use datafusion::arrow::datatypes::{DataType, TimeUnit}; +use datafusion::common::arrow::datatypes::Field; +use datafusion::common::stats::Precision; +use datafusion::common::{ + DFSchema, DFSchemaRef, Result as DataFusionResult, Statistics, TableReference, +}; use datafusion::error::DataFusionError; use datafusion::execution::context::{SessionState, TaskContext}; use datafusion::logical_expr::{ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::{PhysicalExprRef, PhysicalSortExpr}; +use datafusion::physical_expr::{EquivalenceProperties, PhysicalExprRef}; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, + RecordBatchStream, SendableRecordBatchStream, }; use datafusion::physical_planner::PhysicalPlanner; use datafusion::prelude::{col, lit, Expr}; @@ -66,15 +70,14 @@ impl EmptyMetric { field_column_name: String, field_expr: Option, ) -> DataFusionResult { + let qualifier = Some(TableReference::bare("")); let ts_only_schema = build_ts_only_schema(&time_index_column_name); - let mut fields = vec![ts_only_schema.field(0).clone()]; + let mut fields = vec![(qualifier.clone(), Arc::new(ts_only_schema.field(0).clone()))]; if let Some(field_expr) = &field_expr { let field_data_type = field_expr.get_type(&ts_only_schema)?; - fields.push(DFField::new( - Some(""), - &field_column_name, - field_data_type, - true, + fields.push(( + qualifier.clone(), + Arc::new(Field::new(field_column_name, field_data_type, true)), )); } let schema = Arc::new(DFSchema::new_with_metadata(fields, HashMap::new())?); @@ -102,22 +105,23 @@ impl EmptyMetric { .expr .as_ref() .map(|expr| { - physical_planner.create_physical_expr( - expr, - &self.time_index_schema, - &ArrowSchema::from(self.time_index_schema.as_ref()), - session_state, - ) + physical_planner.create_physical_expr(expr, &self.time_index_schema, session_state) }) .transpose()?; - + let result_schema: SchemaRef = Arc::new(self.result_schema.as_ref().into()); + let properties = Arc::new(PlanProperties::new( + EquivalenceProperties::new(result_schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + )); Ok(Arc::new(EmptyMetricExec { start: self.start, end: self.end, interval: self.interval, time_index_schema: Arc::new(self.time_index_schema.as_ref().into()), - result_schema: Arc::new(self.result_schema.as_ref().into()), + result_schema, expr: physical_expr, + properties, metric: ExecutionPlanMetricsSet::new(), })) } @@ -137,7 +141,11 @@ impl UserDefinedLogicalNodeCore for EmptyMetric { } fn expressions(&self) -> Vec { - vec![] + if let Some(expr) = &self.expr { + vec![expr.clone()] + } else { + vec![] + } } fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { @@ -148,8 +156,19 @@ impl UserDefinedLogicalNodeCore for EmptyMetric { ) } - fn from_template(&self, _expr: &[Expr], _inputs: &[LogicalPlan]) -> Self { - self.clone() + fn from_template(&self, expr: &[Expr], _inputs: &[LogicalPlan]) -> Self { + Self { + start: self.start, + end: self.end, + interval: self.interval, + expr: if !expr.is_empty() { + Some(expr[0].clone()) + } else { + None + }, + time_index_schema: self.time_index_schema.clone(), + result_schema: self.result_schema.clone(), + } } } @@ -164,7 +183,7 @@ pub struct EmptyMetricExec { /// Schema of the output record batch result_schema: SchemaRef, expr: Option, - + properties: Arc, metric: ExecutionPlanMetricsSet, } @@ -177,12 +196,8 @@ impl ExecutionPlan for EmptyMetricExec { self.result_schema.clone() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None + fn properties(&self) -> &PlanProperties { + self.properties.as_ref() } fn maintains_input_order(&self) -> Vec { @@ -222,16 +237,15 @@ impl ExecutionPlan for EmptyMetricExec { Some(self.metric.clone_inner()) } - fn statistics(&self) -> Statistics { + fn statistics(&self) -> DataFusionResult { let estimated_row_num = (self.end - self.start) as f64 / self.interval as f64; let total_byte_size = estimated_row_num * std::mem::size_of::() as f64; - Statistics { - num_rows: Some(estimated_row_num.floor() as _), - total_byte_size: Some(total_byte_size.floor() as _), - column_statistics: None, - is_exact: true, - } + Ok(Statistics { + num_rows: Precision::Inexact(estimated_row_num.floor() as _), + total_byte_size: Precision::Inexact(total_byte_size.floor() as _), + column_statistics: Statistics::unknown_column(&self.schema()), + }) } } @@ -285,21 +299,21 @@ impl Stream for EmptyMetricStream { let num_rows = time_array.len(); let input_record_batch = RecordBatch::try_new(self.time_index_schema.clone(), vec![time_array.clone()]) - .map_err(DataFusionError::ArrowError)?; + .map_err(|e| DataFusionError::ArrowError(e, None))?; let mut result_arrays: Vec = vec![time_array]; // evaluate the field expr and get the result if let Some(field_expr) = &self.expr { result_arrays.push( field_expr - .evaluate(&input_record_batch)? - .into_array(num_rows), + .evaluate(&input_record_batch) + .and_then(|x| x.into_array(num_rows))?, ); } // assemble the output record batch let batch = RecordBatch::try_new(self.result_schema.clone(), result_arrays) - .map_err(DataFusionError::ArrowError); + .map_err(|e| DataFusionError::ArrowError(e, None)); Poll::Ready(Some(batch)) } else { @@ -311,14 +325,17 @@ impl Stream for EmptyMetricStream { /// Build a schema that only contains **millisecond** timestamp column fn build_ts_only_schema(column_name: &str) -> DFSchema { - let ts_field = DFField::new( - Some(""), + let ts_field = Field::new( column_name, DataType::Timestamp(TimeUnit::Millisecond, None), false, ); // safety: should not fail (UT covers this) - DFSchema::new_with_metadata(vec![ts_field], HashMap::new()).unwrap() + DFSchema::new_with_metadata( + vec![(Some(TableReference::bare("")), Arc::new(ts_field))], + HashMap::new(), + ) + .unwrap() } // Convert timestamp column to UNIX epoch second: diff --git a/src/promql/src/extension_plan/histogram_fold.rs b/src/promql/src/extension_plan/histogram_fold.rs index c5fabd4a0f..13315a22a0 100644 --- a/src/promql/src/extension_plan/histogram_fold.rs +++ b/src/promql/src/extension_plan/histogram_fold.rs @@ -24,16 +24,17 @@ use datafusion::arrow::array::AsArray; use datafusion::arrow::compute::{self, concat_batches, SortOptions}; use datafusion::arrow::datatypes::{DataType, Float64Type, SchemaRef}; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::common::{DFSchema, DFSchemaRef}; +use datafusion::common::stats::Precision; +use datafusion::common::{ColumnStatistics, DFSchema, DFSchemaRef, Statistics}; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::TaskContext; use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::{PhysicalSortExpr, PhysicalSortRequirement}; +use datafusion::physical_expr::{EquivalenceProperties, PhysicalSortRequirement}; use datafusion::physical_plan::expressions::{CastExpr as PhyCast, Column as PhyColumn}; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr, - RecordBatchStream, SendableRecordBatchStream, Statistics, + PlanProperties, RecordBatchStream, SendableRecordBatchStream, }; use datafusion::prelude::{Column, Expr}; use datatypes::prelude::{ConcreteDataType, DataType as GtDataType}; @@ -141,16 +142,13 @@ impl HistogramFold { ) -> DataFusionResult<()> { let check_column = |col| { if !input_schema.has_column_with_unqualified_name(col) { - return Err(DataFusionError::SchemaError( + Err(DataFusionError::SchemaError( datafusion::common::SchemaError::FieldNotFound { field: Box::new(Column::new(None::, col)), - valid_fields: input_schema - .fields() - .iter() - .map(|f| f.qualified_column()) - .collect(), + valid_fields: input_schema.columns(), }, - )); + Box::new(None), + )) } else { Ok(()) } @@ -166,25 +164,29 @@ impl HistogramFold { // safety: those fields are checked in `check_schema()` let le_column_index = input_schema .index_of_column_by_name(None, &self.le_column) - .unwrap() .unwrap(); let field_column_index = input_schema .index_of_column_by_name(None, &self.field_column) - .unwrap() .unwrap(); let ts_column_index = input_schema .index_of_column_by_name(None, &self.ts_column) - .unwrap() .unwrap(); + let output_schema: SchemaRef = Arc::new(self.output_schema.as_ref().into()); + let properties = PlanProperties::new( + EquivalenceProperties::new(output_schema.clone()), + Partitioning::UnknownPartitioning(1), + exec_input.properties().execution_mode(), + ); Arc::new(HistogramFoldExec { le_column_index, field_column_index, ts_column_index, input: exec_input, quantile: self.quantile.into(), - output_schema: Arc::new(self.output_schema.as_ref().into()), + output_schema, metric: ExecutionPlanMetricsSet::new(), + properties, }) } @@ -195,15 +197,16 @@ impl HistogramFold { input_schema: &DFSchemaRef, le_column: &str, ) -> DataFusionResult { - let mut fields = input_schema.fields().clone(); + let fields = input_schema.fields(); // safety: those fields are checked in `check_schema()` - let le_column_idx = input_schema - .index_of_column_by_name(None, le_column)? - .unwrap(); - fields.remove(le_column_idx); - + let mut new_fields = Vec::with_capacity(fields.len() - 1); + for f in fields { + if f.name() != le_column { + new_fields.push((None, f.clone())); + } + } Ok(Arc::new(DFSchema::new_with_metadata( - fields, + new_fields, HashMap::new(), )?)) } @@ -220,6 +223,7 @@ pub struct HistogramFoldExec { ts_column_index: usize, quantile: f64, metric: ExecutionPlanMetricsSet, + properties: PlanProperties, } impl ExecutionPlan for HistogramFoldExec { @@ -227,16 +231,8 @@ impl ExecutionPlan for HistogramFoldExec { self } - fn schema(&self) -> SchemaRef { - self.output_schema.clone() - } - - fn output_partitioning(&self) -> Partitioning { - self.input.output_partitioning() - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() + fn properties(&self) -> &PlanProperties { + &self.properties } fn required_input_ordering(&self) -> Vec>> { @@ -276,8 +272,7 @@ impl ExecutionPlan for HistogramFoldExec { } fn required_input_distribution(&self) -> Vec { - // partition on all tag columns, i.e., non-le, non-ts and non-field columns - vec![Distribution::HashPartitioned(self.tag_col_exprs())] + vec![Distribution::SinglePartition; self.children().len()] } fn maintains_input_order(&self) -> Vec { @@ -302,6 +297,7 @@ impl ExecutionPlan for HistogramFoldExec { quantile: self.quantile, output_schema: self.output_schema.clone(), field_column_index: self.field_column_index, + properties: self.properties.clone(), })) } @@ -343,13 +339,16 @@ impl ExecutionPlan for HistogramFoldExec { Some(self.metric.clone_inner()) } - fn statistics(&self) -> Statistics { - Statistics { - num_rows: None, - total_byte_size: None, - column_statistics: None, - is_exact: false, - } + fn statistics(&self) -> DataFusionResult { + Ok(Statistics { + num_rows: Precision::Absent, + total_byte_size: Precision::Absent, + column_statistics: vec![ + ColumnStatistics::new_unknown(); + // plus one more for the removed column by function `convert_schema` + self.schema().all_fields().len() + 1 + ], + }) } } @@ -594,7 +593,7 @@ impl HistogramFoldStream { self.output_buffered_rows = 0; RecordBatch::try_new(self.output_schema.clone(), columns) .map(Some) - .map_err(DataFusionError::ArrowError) + .map_err(|e| DataFusionError::ArrowError(e, None)) } /// Find the first `+Inf` which indicates the end of the bucket group @@ -695,6 +694,7 @@ mod test { use datafusion::arrow::datatypes::{Field, Schema}; use datafusion::common::ToDFSchema; use datafusion::physical_plan::memory::MemoryExec; + use datafusion::physical_plan::ExecutionMode; use datafusion::prelude::SessionContext; use datatypes::arrow_array::StringArray; @@ -759,7 +759,7 @@ mod test { #[tokio::test] async fn fold_overall() { let memory_exec = Arc::new(prepare_test_data()); - let output_schema = Arc::new( + let output_schema: SchemaRef = Arc::new( (*HistogramFold::convert_schema( &Arc::new(memory_exec.schema().to_dfschema().unwrap()), "le", @@ -769,6 +769,11 @@ mod test { .clone() .into(), ); + let properties = PlanProperties::new( + EquivalenceProperties::new(output_schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); let fold_exec = Arc::new(HistogramFoldExec { le_column_index: 1, field_column_index: 2, @@ -777,6 +782,7 @@ mod test { input: memory_exec, output_schema, metric: ExecutionPlanMetricsSet::new(), + properties, }); let session_context = SessionContext::default(); diff --git a/src/promql/src/extension_plan/instant_manipulate.rs b/src/promql/src/extension_plan/instant_manipulate.rs index e65592bb37..03e2c373ee 100644 --- a/src/promql/src/extension_plan/instant_manipulate.rs +++ b/src/promql/src/extension_plan/instant_manipulate.rs @@ -21,14 +21,14 @@ use std::task::{Context, Poll}; use datafusion::arrow::array::{Array, Float64Array, TimestampMillisecondArray, UInt64Array}; use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::common::{DFSchema, DFSchemaRef}; +use datafusion::common::stats::Precision; +use datafusion::common::{ColumnStatistics, DFSchema, DFSchemaRef}; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::{EmptyRelation, Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; use datatypes::arrow::compute; @@ -194,16 +194,17 @@ impl ExecutionPlan for InstantManipulateExec { self.input.schema() } - fn output_partitioning(&self) -> Partitioning { - self.input.output_partitioning() + fn properties(&self) -> &PlanProperties { + self.input.properties() } - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() + fn required_input_distribution(&self) -> Vec { + self.input.required_input_distribution() } + // Prevent reordering of input fn maintains_input_order(&self) -> Vec { - vec![true; self.children().len()] + vec![false; self.children().len()] } fn children(&self) -> Vec> { @@ -262,23 +263,28 @@ impl ExecutionPlan for InstantManipulateExec { Some(self.metric.clone_inner()) } - fn statistics(&self) -> Statistics { - let input_stats = self.input.statistics(); + fn statistics(&self) -> DataFusionResult { + let input_stats = self.input.statistics()?; let estimated_row_num = (self.end - self.start) as f64 / self.interval as f64; let estimated_total_bytes = input_stats .total_byte_size - .zip(input_stats.num_rows) - .map(|(size, rows)| (size as f64 / rows as f64) * estimated_row_num) - .map(|size| size.floor() as _); + .get_value() + .zip(input_stats.num_rows.get_value()) + .map(|(size, rows)| { + Precision::Inexact(((*size as f64 / *rows as f64) * estimated_row_num).floor() as _) + }) + .unwrap_or(Precision::Absent); - Statistics { - num_rows: Some(estimated_row_num.floor() as _), + Ok(Statistics { + num_rows: Precision::Inexact(estimated_row_num.floor() as _), total_byte_size: estimated_total_bytes, // TODO(ruihang): support this column statistics - column_statistics: None, - is_exact: false, - } + column_statistics: vec![ + ColumnStatistics::new_unknown(); + self.schema().all_fields().len() + ], + }) } } @@ -438,7 +444,7 @@ impl InstantManipulateStream { arrays[self.time_index] = Arc::new(TimestampMillisecondArray::from(aligned_ts)); let result = RecordBatch::try_new(record_batch.schema(), arrays) - .map_err(DataFusionError::ArrowError)?; + .map_err(|e| DataFusionError::ArrowError(e, None))?; Ok(result) } } diff --git a/src/promql/src/extension_plan/normalize.rs b/src/promql/src/extension_plan/normalize.rs index 968e8a6d37..957c55fade 100644 --- a/src/promql/src/extension_plan/normalize.rs +++ b/src/promql/src/extension_plan/normalize.rs @@ -23,10 +23,9 @@ use datafusion::common::{DFSchema, DFSchemaRef, Result as DataFusionResult, Stat use datafusion::error::DataFusionError; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::{EmptyRelation, Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, RecordBatchStream, + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanProperties, RecordBatchStream, SendableRecordBatchStream, }; use datatypes::arrow::array::TimestampMillisecondArray; @@ -170,12 +169,8 @@ impl ExecutionPlan for SeriesNormalizeExec { vec![Distribution::SinglePartition] } - fn output_partitioning(&self) -> Partitioning { - self.input.output_partitioning() - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() + fn properties(&self) -> &PlanProperties { + self.input.properties() } fn children(&self) -> Vec> { @@ -223,7 +218,7 @@ impl ExecutionPlan for SeriesNormalizeExec { Some(self.metric.clone_inner()) } - fn statistics(&self) -> Statistics { + fn statistics(&self) -> DataFusionResult { self.input.statistics() } } @@ -299,7 +294,7 @@ impl SeriesNormalizeStream { } let result = compute::filter_record_batch(&ordered_batch, &BooleanArray::from(filter)) - .map_err(DataFusionError::ArrowError)?; + .map_err(|e| DataFusionError::ArrowError(e, None))?; Ok(result) } } diff --git a/src/promql/src/extension_plan/range_manipulate.rs b/src/promql/src/extension_plan/range_manipulate.rs index e6e93d8b50..49002dabfa 100644 --- a/src/promql/src/extension_plan/range_manipulate.rs +++ b/src/promql/src/extension_plan/range_manipulate.rs @@ -23,14 +23,15 @@ use datafusion::arrow::compute; use datafusion::arrow::datatypes::{Field, SchemaRef}; use datafusion::arrow::error::ArrowError; use datafusion::arrow::record_batch::RecordBatch; -use datafusion::common::{DFField, DFSchema, DFSchemaRef}; +use datafusion::common::stats::Precision; +use datafusion::common::{DFSchema, DFSchemaRef}; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::{EmptyRelation, Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_expr::EquivalenceProperties; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; use datafusion::sql::TableReference; @@ -110,44 +111,55 @@ impl RangeManipulate { time_index: &str, field_columns: &[String], ) -> DataFusionResult { - let mut columns = input_schema.fields().clone(); + let columns = input_schema.fields(); + let mut new_columns = Vec::with_capacity(columns.len() + 1); + for i in 0..columns.len() { + let x = input_schema.qualified_field(i); + new_columns.push((x.0.cloned(), Arc::new(x.1.clone()))); + } // process time index column // the raw timestamp field is preserved. And a new timestamp_range field is appended to the last. - let Some(ts_col_index) = input_schema.index_of_column_by_name(None, time_index)? else { + let Some(ts_col_index) = input_schema.index_of_column_by_name(None, time_index) else { return Err(datafusion::common::field_not_found( None::, time_index, input_schema.as_ref(), )); }; - let ts_col_field = columns[ts_col_index].field(); + let ts_col_field = &columns[ts_col_index]; let timestamp_range_field = Field::new( Self::build_timestamp_range_name(time_index), RangeArray::convert_field(ts_col_field).data_type().clone(), ts_col_field.is_nullable(), ); - columns.push(DFField::from(timestamp_range_field)); + new_columns.push((None, Arc::new(timestamp_range_field))); // process value columns for name in field_columns { - let Some(index) = input_schema.index_of_column_by_name(None, name)? else { + let Some(index) = input_schema.index_of_column_by_name(None, name) else { return Err(datafusion::common::field_not_found( None::, name, input_schema.as_ref(), )); }; - columns[index] = DFField::from(RangeArray::convert_field(columns[index].field())); + new_columns[index] = (None, Arc::new(RangeArray::convert_field(&columns[index]))); } Ok(Arc::new(DFSchema::new_with_metadata( - columns, + new_columns, HashMap::new(), )?)) } pub fn to_execution_plan(&self, exec_input: Arc) -> Arc { + let output_schema: SchemaRef = SchemaRef::new(self.output_schema.as_ref().into()); + let properties = PlanProperties::new( + EquivalenceProperties::new(output_schema.clone()), + exec_input.properties().partitioning.clone(), + exec_input.properties().execution_mode, + ); Arc::new(RangeManipulateExec { start: self.start, end: self.end, @@ -157,8 +169,9 @@ impl RangeManipulate { time_range_column: self.range_timestamp_name(), field_columns: self.field_columns.clone(), input: exec_input, - output_schema: SchemaRef::new(self.output_schema.as_ref().into()), + output_schema, metric: ExecutionPlanMetricsSet::new(), + properties, }) } @@ -247,6 +260,7 @@ pub struct RangeManipulateExec { input: Arc, output_schema: SchemaRef, metric: ExecutionPlanMetricsSet, + properties: PlanProperties, } impl ExecutionPlan for RangeManipulateExec { @@ -258,12 +272,8 @@ impl ExecutionPlan for RangeManipulateExec { self.output_schema.clone() } - fn output_partitioning(&self) -> Partitioning { - self.input.output_partitioning() - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() + fn properties(&self) -> &PlanProperties { + &self.properties } fn maintains_input_order(&self) -> Vec { @@ -274,6 +284,10 @@ impl ExecutionPlan for RangeManipulateExec { vec![self.input.clone()] } + fn required_input_distribution(&self) -> Vec { + vec![Distribution::SinglePartition] + } + fn with_new_children( self: Arc, children: Vec>, @@ -290,6 +304,7 @@ impl ExecutionPlan for RangeManipulateExec { output_schema: self.output_schema.clone(), input: children[0].clone(), metric: self.metric.clone(), + properties: self.properties.clone(), })) } @@ -333,23 +348,25 @@ impl ExecutionPlan for RangeManipulateExec { Some(self.metric.clone_inner()) } - fn statistics(&self) -> Statistics { - let input_stats = self.input.statistics(); + fn statistics(&self) -> DataFusionResult { + let input_stats = self.input.statistics()?; let estimated_row_num = (self.end - self.start) as f64 / self.interval as f64; let estimated_total_bytes = input_stats .total_byte_size - .zip(input_stats.num_rows) - .map(|(size, rows)| (size as f64 / rows as f64) * estimated_row_num) - .map(|size| size.floor() as _); + .get_value() + .zip(input_stats.num_rows.get_value()) + .map(|(size, rows)| { + Precision::Inexact(((*size as f64 / *rows as f64) * estimated_row_num).floor() as _) + }) + .unwrap_or_default(); - Statistics { - num_rows: Some(estimated_row_num.floor() as _), + Ok(Statistics { + num_rows: Precision::Inexact(estimated_row_num as _), total_byte_size: estimated_total_bytes, // TODO(ruihang): support this column statistics - column_statistics: None, - is_exact: false, - } + column_statistics: Statistics::unknown_column(&self.schema()), + }) } } @@ -452,7 +469,7 @@ impl RangeManipulateStream { RecordBatch::try_new(self.output_schema.clone(), new_columns) .map(Some) - .map_err(DataFusionError::ArrowError) + .map_err(|e| DataFusionError::ArrowError(e, None)) } fn calculate_range(&self, input: &RecordBatch) -> (ArrayRef, Vec<(u32, u32)>) { @@ -500,7 +517,9 @@ mod test { ArrowPrimitiveType, DataType, Field, Int64Type, Schema, TimestampMillisecondType, }; use datafusion::common::ToDFSchema; + use datafusion::physical_expr::Partitioning; use datafusion::physical_plan::memory::MemoryExec; + use datafusion::physical_plan::ExecutionMode; use datafusion::prelude::SessionContext; use datatypes::arrow::array::TimestampMillisecondArray; @@ -556,6 +575,11 @@ mod test { .as_ref() .into(), ); + let properties = PlanProperties::new( + EquivalenceProperties::new(manipulate_output_schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); let normalize_exec = Arc::new(RangeManipulateExec { start, end, @@ -567,6 +591,7 @@ mod test { time_index_column: time_index, input: memory_exec, metric: ExecutionPlanMetricsSet::new(), + properties, }); let session_context = SessionContext::default(); let result = datafusion::physical_plan::collect(normalize_exec, session_context.task_ctx()) diff --git a/src/promql/src/extension_plan/series_divide.rs b/src/promql/src/extension_plan/series_divide.rs index 4abf8d8788..d0524b5d53 100644 --- a/src/promql/src/extension_plan/series_divide.rs +++ b/src/promql/src/extension_plan/series_divide.rs @@ -24,12 +24,12 @@ use datafusion::common::{DFSchema, DFSchemaRef}; use datafusion::error::Result as DataFusionResult; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::{EmptyRelation, Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::{PhysicalSortExpr, PhysicalSortRequirement}; +use datafusion::physical_expr::PhysicalSortRequirement; use datafusion::physical_plan::expressions::Column as ColumnExpr; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, RecordBatchStream, - SendableRecordBatchStream, Statistics, + DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanProperties, RecordBatchStream, + SendableRecordBatchStream, }; use datatypes::arrow::compute; use futures::{ready, Stream, StreamExt}; @@ -130,8 +130,8 @@ impl ExecutionPlan for SeriesDivideExec { self.input.schema() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) + fn properties(&self) -> &PlanProperties { + self.input.properties() } fn required_input_distribution(&self) -> Vec { @@ -156,10 +156,6 @@ impl ExecutionPlan for SeriesDivideExec { } } - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() - } - fn maintains_input_order(&self) -> Vec { vec![true; self.children().len()] } @@ -212,16 +208,6 @@ impl ExecutionPlan for SeriesDivideExec { fn metrics(&self) -> Option { Some(self.metric.clone_inner()) } - - fn statistics(&self) -> Statistics { - Statistics { - num_rows: None, - total_byte_size: None, - // TODO(ruihang): support this column statistics - column_statistics: None, - is_exact: false, - } - } } impl DisplayAs for SeriesDivideExec { diff --git a/src/promql/src/extension_plan/union_distinct_on.rs b/src/promql/src/extension_plan/union_distinct_on.rs index 20e40415c9..4624544645 100644 --- a/src/promql/src/extension_plan/union_distinct_on.rs +++ b/src/promql/src/extension_plan/union_distinct_on.rs @@ -25,11 +25,11 @@ use datafusion::common::DFSchemaRef; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::execution::context::TaskContext; use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion::physical_expr::PhysicalSortExpr; +use datafusion::physical_expr::EquivalenceProperties; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ - hash_utils, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, - RecordBatchStream, SendableRecordBatchStream, Statistics, + hash_utils, DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan, + Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, }; use datatypes::arrow::compute; use futures::future::BoxFuture; @@ -91,13 +91,20 @@ impl UnionDistinctOn { left_exec: Arc, right_exec: Arc, ) -> Arc { + let output_schema: SchemaRef = Arc::new(self.output_schema.as_ref().into()); + let properties = Arc::new(PlanProperties::new( + EquivalenceProperties::new(output_schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + )); Arc::new(UnionDistinctOnExec { left: left_exec, right: right_exec, compare_keys: self.compare_keys.clone(), ts_col: self.ts_col.clone(), - output_schema: Arc::new(self.output_schema.as_ref().into()), + output_schema, metric: ExecutionPlanMetricsSet::new(), + properties, random_state: RandomState::new(), }) } @@ -151,6 +158,7 @@ pub struct UnionDistinctOnExec { ts_col: String, output_schema: SchemaRef, metric: ExecutionPlanMetricsSet, + properties: Arc, /// Shared the `RandomState` for the hashing algorithm random_state: RandomState, @@ -169,14 +177,8 @@ impl ExecutionPlan for UnionDistinctOnExec { vec![Distribution::SinglePartition, Distribution::SinglePartition] } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - /// [UnionDistinctOnExec] will output left first, then right. - /// So the order of the output is not maintained. - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None + fn properties(&self) -> &PlanProperties { + self.properties.as_ref() } fn children(&self) -> Vec> { @@ -198,6 +200,7 @@ impl ExecutionPlan for UnionDistinctOnExec { ts_col: self.ts_col.clone(), output_schema: self.output_schema.clone(), metric: self.metric.clone(), + properties: self.properties.clone(), random_state: self.random_state.clone(), })) } @@ -249,10 +252,6 @@ impl ExecutionPlan for UnionDistinctOnExec { fn metrics(&self) -> Option { Some(self.metric.clone_inner()) } - - fn statistics(&self) -> Statistics { - Statistics::default() - } } impl DisplayAs for UnionDistinctOnExec { @@ -472,7 +471,8 @@ fn interleave_batches( } // assemble new record batch - RecordBatch::try_new(schema.clone(), interleaved_arrays).map_err(DataFusionError::ArrowError) + RecordBatch::try_new(schema.clone(), interleaved_arrays) + .map_err(|e| DataFusionError::ArrowError(e, None)) } /// Utility function to take rows from a record batch. Based on [take](datafusion::arrow::compute::take) @@ -490,9 +490,10 @@ fn take_batch(batch: &RecordBatch, indices: &[usize]) -> DataFusionResult, _>>() - .map_err(DataFusionError::ArrowError)?; + .map_err(|e| DataFusionError::ArrowError(e, None))?; - let result = RecordBatch::try_new(schema, arrays).map_err(DataFusionError::ArrowError)?; + let result = + RecordBatch::try_new(schema, arrays).map_err(|e| DataFusionError::ArrowError(e, None))?; Ok(result) } diff --git a/src/promql/src/functions/extrapolate_rate.rs b/src/promql/src/functions/extrapolate_rate.rs index 7cba367145..7a57efae04 100644 --- a/src/promql/src/functions/extrapolate_rate.rs +++ b/src/promql/src/functions/extrapolate_rate.rs @@ -204,15 +204,17 @@ impl ExtrapolatedRate { } pub fn scalar_udf(range_length: i64) -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(move |input| Self::new(range_length).calc(input)), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(move |input: &_| Self::new(range_length).calc(input)) as _), + ) } } @@ -223,15 +225,17 @@ impl ExtrapolatedRate { } pub fn scalar_udf(range_length: i64) -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(move |input| Self::new(range_length).calc(input)), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(move |input: &_| Self::new(range_length).calc(input)) as _), + ) } } @@ -242,15 +246,17 @@ impl ExtrapolatedRate { } pub fn scalar_udf(range_length: i64) -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(move |input| Self::new(range_length).calc(input)), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(move |input: &_| Self::new(range_length).calc(input)) as _), + ) } } diff --git a/src/promql/src/functions/holt_winters.rs b/src/promql/src/functions/holt_winters.rs index 4480ccc52d..c047c1883d 100644 --- a/src/promql/src/functions/holt_winters.rs +++ b/src/promql/src/functions/holt_winters.rs @@ -68,15 +68,17 @@ impl HoltWinters { } pub fn scalar_udf(level: f64, trend: f64) -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(move |input| Self::new(level, trend).calc(input)), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(move |input: &_| Self::new(level, trend).calc(input)) as _), + ) } fn calc(&self, input: &[ColumnarValue]) -> Result { diff --git a/src/promql/src/functions/idelta.rs b/src/promql/src/functions/idelta.rs index cf8ad2d0d6..9a74b65fec 100644 --- a/src/promql/src/functions/idelta.rs +++ b/src/promql/src/functions/idelta.rs @@ -42,15 +42,17 @@ impl IDelta { } pub fn scalar_udf() -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(Self::calc), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(Self::calc) as _), + ) } // time index column and value column diff --git a/src/promql/src/functions/predict_linear.rs b/src/promql/src/functions/predict_linear.rs index 965fa28afc..c9b24a76a8 100644 --- a/src/promql/src/functions/predict_linear.rs +++ b/src/promql/src/functions/predict_linear.rs @@ -44,15 +44,17 @@ impl PredictLinear { } pub fn scalar_udf(t: i64) -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(move |input| Self::new(t).calc(input)), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(move |input: &_| Self::new(t).calc(input)) as _), + ) } // time index column and value column diff --git a/src/promql/src/functions/quantile.rs b/src/promql/src/functions/quantile.rs index 62ff2f2126..d055ad1227 100644 --- a/src/promql/src/functions/quantile.rs +++ b/src/promql/src/functions/quantile.rs @@ -40,15 +40,17 @@ impl QuantileOverTime { } pub fn scalar_udf(quantile: f64) -> ScalarUDF { - ScalarUDF { - name: Self::name().to_string(), - signature: Signature::new( + // TODO(LFC): Use the new Datafusion UDF impl. + #[allow(deprecated)] + ScalarUDF::new( + Self::name(), + &Signature::new( TypeSignature::Exact(Self::input_type()), Volatility::Immutable, ), - return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))), - fun: Arc::new(move |input| Self::new(quantile).calc(input)), - } + &(Arc::new(|_: &_| Ok(Arc::new(Self::return_type()))) as _), + &(Arc::new(move |input: &_| Self::new(quantile).calc(input)) as _), + ) } // time index column and value column diff --git a/src/promql/src/functions/test_util.rs b/src/promql/src/functions/test_util.rs index 5b9d4adef3..8f9558e590 100644 --- a/src/promql/src/functions/test_util.rs +++ b/src/promql/src/functions/test_util.rs @@ -32,12 +32,20 @@ pub fn simple_range_udf_runner( ColumnarValue::Array(Arc::new(input_ts.into_dict())), ColumnarValue::Array(Arc::new(input_value.into_dict())), ]; - let eval_result: Vec> = extract_array(&(range_fn.fun)(&input).unwrap()) + let eval_result: Vec> = extract_array(&(range_fn.fun())(&input).unwrap()) .unwrap() .as_any() .downcast_ref::() .unwrap() .iter() .collect(); - assert_eq!(eval_result, expected) + assert_eq!(eval_result.len(), expected.len()); + assert!(eval_result + .iter() + .zip(expected.iter()) + .all(|(x, y)| match (*x, *y) { + (Some(x), Some(y)) => (x - y).abs() < 0.0001, + (None, None) => true, + _ => false, + })); } diff --git a/src/promql/src/planner.rs b/src/promql/src/planner.rs index 6cd0e17828..daa11025e4 100644 --- a/src/promql/src/planner.rs +++ b/src/promql/src/planner.rs @@ -20,20 +20,23 @@ use std::time::UNIX_EPOCH; use async_recursion::async_recursion; use catalog::table_source::DfTableSourceProvider; use common_query::prelude::GREPTIME_VALUE; -use datafusion::common::{DFSchemaRef, OwnedTableReference, Result as DfResult}; +use datafusion::common::{DFSchemaRef, Result as DfResult}; use datafusion::datasource::DefaultTableSource; -use datafusion::logical_expr::expr::{AggregateFunction, Alias, ScalarFunction, ScalarUDF}; +use datafusion::logical_expr::expr::{ + AggregateFunction, AggregateFunctionDefinition, Alias, ScalarFunction, +}; use datafusion::logical_expr::expr_rewriter::normalize_cols; use datafusion::logical_expr::{ AggregateFunction as AggregateFunctionEnum, BinaryExpr, BuiltinScalarFunction, Cast, Extension, - LogicalPlan, LogicalPlanBuilder, Operator, ScalarUDF as ScalarUdfDef, + LogicalPlan, LogicalPlanBuilder, Operator, ScalarFunctionDefinition, ScalarUDF as ScalarUdfDef, }; -use datafusion::optimizer::utils::{self, conjunction}; use datafusion::prelude as df_prelude; use datafusion::prelude::{Column, Expr as DfExpr, JoinType}; use datafusion::scalar::ScalarValue; use datafusion::sql::TableReference; +use datafusion_expr::utils::conjunction; use datatypes::arrow::datatypes::DataType as ArrowDataType; +use itertools::Itertools; use promql_parser::label::{MatchOp, Matcher, Matchers, METRIC_NAME}; use promql_parser::parser::{ token, AggregateExpr, BinModifier, BinaryExpr as PromBinaryExpr, Call, EvalStmt, @@ -305,14 +308,14 @@ impl PromPlanner { let left_field_columns = self.ctx.field_columns.clone(); let mut left_table_ref = self .table_ref() - .unwrap_or_else(|_| OwnedTableReference::bare("")); + .unwrap_or_else(|_| TableReference::bare("")); let left_context = self.ctx.clone(); let right_input = self.prom_expr_to_plan(*rhs.clone()).await?; let right_field_columns = self.ctx.field_columns.clone(); let mut right_table_ref = self .table_ref() - .unwrap_or_else(|_| OwnedTableReference::bare("")); + .unwrap_or_else(|_| TableReference::bare("")); let right_context = self.ctx.clone(); // TODO(ruihang): avoid join if left and right are the same table @@ -332,8 +335,8 @@ impl PromPlanner { // normal join if left_table_ref == right_table_ref { // rename table references to avoid ambiguity - left_table_ref = OwnedTableReference::bare("lhs"); - right_table_ref = OwnedTableReference::bare("rhs"); + left_table_ref = TableReference::bare("lhs"); + right_table_ref = TableReference::bare("rhs"); self.ctx.table_name = Some("lhs".to_string()); } let mut field_columns = @@ -349,13 +352,13 @@ impl PromPlanner { let bin_expr_builder = |_: &String| { let (left_col_name, right_col_name) = field_columns.next().unwrap(); let left_col = join_plan_schema - .field_with_name(Some(&left_table_ref), left_col_name) + .qualified_field_with_name(Some(&left_table_ref), left_col_name) .context(DataFusionPlanningSnafu)? - .qualified_column(); + .into(); let right_col = join_plan_schema - .field_with_name(Some(&right_table_ref), right_col_name) + .qualified_field_with_name(Some(&right_table_ref), right_col_name) .context(DataFusionPlanningSnafu)? - .qualified_column(); + .into(); let binary_expr_builder = Self::prom_token_to_binary_expr_builder(*op)?; let mut binary_expr = binary_expr_builder( @@ -859,7 +862,7 @@ impl PromPlanner { Ok(exprs) } - fn table_ref(&self) -> Result { + fn table_ref(&self) -> Result { let table_name = self .ctx .table_name @@ -868,12 +871,12 @@ impl PromPlanner { // set schema name if `__schema__` is given let table_ref = if let Some(schema_name) = &self.ctx.schema_name { - TableReference::partial(schema_name, &table_name) + TableReference::partial(schema_name.as_str(), table_name.as_str()) } else { - TableReference::bare(&table_name) + TableReference::bare(table_name.as_str()) }; - Ok(table_ref.to_owned_reference()) + Ok(table_ref) } /// Create a table scan plan and a filter plan with given filter. @@ -882,7 +885,7 @@ impl PromPlanner { /// If the filter is empty async fn create_table_scan_plan( &mut self, - table_ref: OwnedTableReference, + table_ref: TableReference, filter: Vec, ) -> Result { let provider = self @@ -1128,7 +1131,9 @@ impl PromPlanner { right: Box::new(interval_1day_lit_expr), }); let date_trunc_expr = DfExpr::ScalarFunction(ScalarFunction { - fun: BuiltinScalarFunction::DateTrunc, + func_def: ScalarFunctionDefinition::UDF( + datafusion_functions::datetime::date_trunc(), + ), args: vec![month_lit_expr, self.create_time_index_column_expr()?], }); let date_trunc_plus_interval_expr = DfExpr::BinaryExpr(BinaryExpr { @@ -1137,21 +1142,30 @@ impl PromPlanner { right: Box::new(the_1month_minus_1day_expr), }); let date_part_expr = DfExpr::ScalarFunction(ScalarFunction { - fun: BuiltinScalarFunction::DatePart, + func_def: ScalarFunctionDefinition::UDF( + datafusion_functions::datetime::date_part(), + ), args: vec![day_lit_expr, date_trunc_plus_interval_expr], }); exprs.push(date_part_expr); ScalarFunc::GeneratedExpr } - _ => ScalarFunc::DataFusionBuiltin( - BuiltinScalarFunction::from_str(func.name).map_err(|_| { - UnsupportedExprSnafu { + _ => { + if let Ok(f) = BuiltinScalarFunction::from_str(func.name) { + ScalarFunc::DataFusionBuiltin(f) + } else if let Some(f) = datafusion_functions::math::functions() + .iter() + .find(|f| f.name() == func.name) + { + ScalarFunc::DataFusionUdf(f.clone()) + } else { + return UnsupportedExprSnafu { name: func.name.to_string(), } - .build() - })?, - ), + .fail(); + } + } }; for value in &self.ctx.field_columns { @@ -1161,12 +1175,24 @@ impl PromPlanner { ScalarFunc::DataFusionBuiltin(fun) => { other_input_exprs.insert(field_column_pos, col_expr); let fn_expr = DfExpr::ScalarFunction(ScalarFunction { - fun, + func_def: ScalarFunctionDefinition::BuiltIn(fun), args: other_input_exprs.clone().into(), }); exprs.push(fn_expr); let _ = other_input_exprs.remove(field_column_pos); } + ScalarFunc::DataFusionUdf(f) => { + let args = itertools::chain!( + other_input_exprs.iter().take(field_column_pos).cloned(), + std::iter::once(col_expr), + other_input_exprs.iter().skip(field_column_pos).cloned() + ) + .collect_vec(); + exprs.push(DfExpr::ScalarFunction(ScalarFunction { + func_def: ScalarFunctionDefinition::UDF(f), + args, + })) + } ScalarFunc::Udf(fun) => { let ts_range_expr = DfExpr::Column(Column::from_name( RangeManipulate::build_timestamp_range_name( @@ -1175,8 +1201,8 @@ impl PromPlanner { )); other_input_exprs.insert(field_column_pos, ts_range_expr); other_input_exprs.insert(field_column_pos + 1, col_expr); - let fn_expr = DfExpr::ScalarUDF(ScalarUDF { - fun: Arc::new(fun), + let fn_expr = DfExpr::ScalarFunction(ScalarFunction { + func_def: ScalarFunctionDefinition::UDF(Arc::new(fun)), args: other_input_exprs.clone().into(), }); exprs.push(fn_expr); @@ -1193,8 +1219,8 @@ impl PromPlanner { other_input_exprs.insert(field_column_pos + 1, col_expr); other_input_exprs .insert(field_column_pos + 2, self.create_time_index_column_expr()?); - let fn_expr = DfExpr::ScalarUDF(ScalarUDF { - fun: Arc::new(fun), + let fn_expr = DfExpr::ScalarFunction(ScalarFunction { + func_def: ScalarFunctionDefinition::UDF(Arc::new(fun)), args: other_input_exprs.clone().into(), }); exprs.push(fn_expr); @@ -1258,7 +1284,7 @@ impl PromPlanner { exprs.push(expr); } - utils::conjunction(exprs).context(ValueNotFoundSnafu { + conjunction(exprs).context(ValueNotFoundSnafu { table: self.table_ref()?.to_quoted_string(), }) } @@ -1299,11 +1325,12 @@ impl PromPlanner { .iter() .map(|col| { DfExpr::AggregateFunction(AggregateFunction { - fun: aggr.clone(), + func_def: AggregateFunctionDefinition::BuiltIn(aggr.clone()), args: vec![DfExpr::Column(Column::from_name(col))], distinct: false, filter: None, order_by: None, + null_treatment: None, }) }) .collect(); @@ -1515,13 +1542,16 @@ impl PromPlanner { token::T_LTE => Ok(Box::new(|lhs, rhs| Ok(lhs.lt_eq(rhs)))), token::T_POW => Ok(Box::new(|lhs, rhs| { Ok(DfExpr::ScalarFunction(ScalarFunction { - fun: BuiltinScalarFunction::Power, + func_def: ScalarFunctionDefinition::UDF(datafusion_functions::math::power()), args: vec![lhs, rhs], })) })), token::T_ATAN2 => Ok(Box::new(|lhs, rhs| { Ok(DfExpr::ScalarFunction(ScalarFunction { - fun: BuiltinScalarFunction::Atan2, + // func_def: ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Atan2), + func_def: datafusion_expr::ScalarFunctionDefinition::UDF( + datafusion_functions::math::atan2(), + ), args: vec![lhs, rhs], })) })), @@ -1557,8 +1587,8 @@ impl PromPlanner { &self, left: LogicalPlan, right: LogicalPlan, - left_table_ref: OwnedTableReference, - right_table_ref: OwnedTableReference, + left_table_ref: TableReference, + right_table_ref: TableReference, ) -> Result { let mut tag_columns = self .ctx @@ -1778,8 +1808,8 @@ impl PromPlanner { .difference(&right_tag_cols_set) .cloned() .collect::>(); - let left_qualifier = left.schema().field(0).qualifier().cloned(); - let right_qualifier = right.schema().field(0).qualifier().cloned(); + let left_qualifier = left.schema().qualified_field(0).0.cloned(); + let right_qualifier = right.schema().qualified_field(0).0.cloned(); let left_qualifier_string = left_qualifier .as_ref() .map(|l| l.to_string()) @@ -1953,7 +1983,7 @@ impl PromPlanner { let field_columns_iter = result_field_columns .into_iter() .zip(self.ctx.field_columns.iter()) - .map(|(expr, name)| Ok(DfExpr::Alias(Alias::new(expr, name.to_string())))); + .map(|(expr, name)| Ok(DfExpr::Alias(Alias::new(expr, None::, name)))); // chain non-field columns (unchanged) and field columns (applied computation then alias) let project_fields = non_field_columns_iter @@ -2007,7 +2037,7 @@ impl PromPlanner { })?, ); let fn_expr = DfExpr::ScalarFunction(ScalarFunction { - fun: BuiltinScalarFunction::DatePart, + func_def: ScalarFunctionDefinition::UDF(datafusion_functions::datetime::date_part()), args: vec![lit_expr, input_expr], }); Ok(fn_expr) @@ -2023,6 +2053,8 @@ struct FunctionArgs { #[derive(Debug, Clone)] enum ScalarFunc { DataFusionBuiltin(BuiltinScalarFunction), + /// The UDF that is defined by Datafusion itself. + DataFusionUdf(Arc), Udf(ScalarUdfDef), // todo(ruihang): maybe merge with Udf later /// UDF that require extra information like range length to be evaluated. @@ -2436,7 +2468,7 @@ mod test { #[tokio::test] async fn aggregate_stdvar() { - do_aggregate_expr_plan("stdvar", "VARIANCE_POP").await; + do_aggregate_expr_plan("stdvar", "VAR_POP").await; } #[tokio::test] diff --git a/src/query/src/datafusion.rs b/src/query/src/datafusion.rs index b6f0e9543f..f646d6b90c 100644 --- a/src/query/src/datafusion.rs +++ b/src/query/src/datafusion.rs @@ -161,9 +161,9 @@ impl DatafusionQueryEngine { } #[tracing::instrument(skip_all)] - async fn delete<'a>( + async fn delete( &self, - table_name: &ResolvedTableReference<'a>, + table_name: &ResolvedTableReference, table: &TableRef, column_vectors: HashMap, query_ctx: QueryContextRef, @@ -205,9 +205,9 @@ impl DatafusionQueryEngine { } #[tracing::instrument(skip_all)] - async fn insert<'a>( + async fn insert( &self, - table_name: &ResolvedTableReference<'a>, + table_name: &ResolvedTableReference, column_vectors: HashMap, query_ctx: QueryContextRef, ) -> Result { @@ -226,7 +226,7 @@ impl DatafusionQueryEngine { .context(TableMutationSnafu) } - async fn find_table(&self, table_name: &ResolvedTableReference<'_>) -> Result { + async fn find_table(&self, table_name: &ResolvedTableReference) -> Result { let catalog_name = table_name.catalog.as_ref(); let schema_name = table_name.schema.as_ref(); let table_name = table_name.table.as_ref(); @@ -309,7 +309,9 @@ impl QueryEngine for DatafusionQueryEngine { } fn engine_context(&self, query_ctx: QueryContextRef) -> QueryEngineContext { - QueryEngineContext::new(self.state.session_state(), query_ctx) + let mut state = self.state.session_state(); + state.config_mut().set_extension(query_ctx.clone()); + QueryEngineContext::new(state, query_ctx) } } @@ -435,7 +437,7 @@ impl QueryExecutor for DatafusionQueryEngine { let exec_timer = metrics::EXEC_PLAN_ELAPSED.start_timer(); let task_ctx = ctx.build_task_ctx(); - match plan.output_partitioning().partition_count() { + match plan.properties().output_partitioning().partition_count() { 0 => Ok(Box::pin(EmptyRecordBatchStream::new(plan.schema()))), 1 => { let stream = plan @@ -453,7 +455,7 @@ impl QueryExecutor for DatafusionQueryEngine { // merge into a single partition let plan = CoalescePartitionsExec::new(df_plan.clone()); // CoalescePartitionsExec must produce a single partition - assert_eq!(1, plan.output_partitioning().partition_count()); + assert_eq!(1, plan.properties().output_partitioning().partition_count()); let df_stream = plan .execute(0, task_ctx) .context(error::DatafusionSnafu) @@ -475,7 +477,6 @@ impl QueryExecutor for DatafusionQueryEngine { #[cfg(test)] mod tests { - use std::borrow::Cow::Borrowed; use std::sync::Arc; use catalog::RegisterTableRequest; @@ -576,9 +577,9 @@ mod tests { .unwrap(); let table = engine .find_table(&ResolvedTableReference { - catalog: Borrowed("greptime"), - schema: Borrowed("public"), - table: Borrowed("numbers"), + catalog: "greptime".into(), + schema: "public".into(), + table: "numbers".into(), }) .await .unwrap(); diff --git a/src/query/src/datafusion/planner.rs b/src/query/src/datafusion/planner.rs index 67ec4c8a3c..1ff9770a56 100644 --- a/src/query/src/datafusion/planner.rs +++ b/src/query/src/datafusion/planner.rs @@ -23,13 +23,13 @@ use common_query::logical_plan::create_aggregate_function; use datafusion::catalog::TableReference; use datafusion::error::Result as DfResult; use datafusion::execution::context::SessionState; -use datafusion::physical_plan::udaf::AggregateUDF; use datafusion::physical_plan::udf::ScalarUDF; use datafusion::sql::planner::ContextProvider; +use datafusion::variable::VarType; use datafusion_common::config::ConfigOptions; -use datafusion_common::{DataFusionError, OwnedTableReference}; -use datafusion_expr::{TableSource, WindowUDF}; -use datafusion_physical_expr::var_provider::{is_system_variables, VarType}; +use datafusion_common::DataFusionError; +use datafusion_expr::var_provider::is_system_variables; +use datafusion_expr::{AggregateUDF, TableSource, WindowUDF}; use datafusion_sql::parser::Statement as DfStatement; use session::context::QueryContextRef; use snafu::ResultExt; @@ -79,7 +79,7 @@ impl DfContextProviderAdapter { } async fn resolve_tables( - table_names: Vec, + table_names: Vec, table_provider: &mut DfTableSourceProvider, ) -> Result>> { let mut tables = HashMap::with_capacity(table_names.len()); @@ -102,7 +102,7 @@ async fn resolve_tables( } impl ContextProvider for DfContextProviderAdapter { - fn get_table_provider(&self, name: TableReference) -> DfResult> { + fn get_table_source(&self, name: TableReference) -> DfResult> { let table_ref = self.table_provider.resolve_table_ref(name)?; self.tables .get(&table_ref.to_string()) @@ -159,4 +159,19 @@ impl ContextProvider for DfContextProviderAdapter { fn options(&self) -> &ConfigOptions { self.session_state.config_options() } + + fn udfs_names(&self) -> Vec { + // TODO(LFC): Impl it. + vec![] + } + + fn udafs_names(&self) -> Vec { + // TODO(LFC): Impl it. + vec![] + } + + fn udwfs_names(&self) -> Vec { + // TODO(LFC): Impl it. + vec![] + } } diff --git a/src/query/src/dist_plan/analyzer.rs b/src/query/src/dist_plan/analyzer.rs index 10c2c2e9e2..870b926339 100644 --- a/src/query/src/dist_plan/analyzer.rs +++ b/src/query/src/dist_plan/analyzer.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use datafusion::datasource::DefaultTableSource; use datafusion::error::Result as DfResult; use datafusion_common::config::ConfigOptions; -use datafusion_common::tree_node::{RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter}; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_expr::expr::{Exists, InSubquery}; use datafusion_expr::{col, Expr, LogicalPlan, LogicalPlanBuilder, Subquery}; use datafusion_optimizer::analyzer::AnalyzerRule; @@ -47,12 +47,12 @@ impl AnalyzerRule for DistPlannerAnalyzer { // preprocess the input plan let optimizer_context = OptimizerContext::new(); let plan = SimplifyExpressions::new() - .try_optimize(&plan, &optimizer_context)? - .unwrap_or(plan); + .rewrite(plan, &optimizer_context)? + .data; let plan = plan.transform(&Self::inspect_plan_with_subquery)?; let mut rewriter = PlanRewriter::default(); - let result = plan.rewrite(&mut rewriter)?; + let result = plan.data.rewrite(&mut rewriter)?.data; Ok(result) } @@ -63,35 +63,40 @@ impl DistPlannerAnalyzer { let exprs = plan .expressions() .into_iter() - .map(|e| e.transform(&Self::transform_subquery)) + .map(|e| e.transform(&Self::transform_subquery).map(|x| x.data)) .collect::>>()?; let inputs = plan.inputs().into_iter().cloned().collect::>(); - Ok(Transformed::Yes(plan.with_new_exprs(exprs, &inputs)?)) + Ok(Transformed::yes(plan.with_new_exprs(exprs, inputs)?)) } fn transform_subquery(expr: Expr) -> DfResult> { match expr { - Expr::Exists(exists) => Ok(Transformed::Yes(Expr::Exists(Exists { + Expr::Exists(exists) => Ok(Transformed::yes(Expr::Exists(Exists { subquery: Self::handle_subquery(exists.subquery)?, negated: exists.negated, }))), - Expr::InSubquery(in_subquery) => Ok(Transformed::Yes(Expr::InSubquery(InSubquery { + Expr::InSubquery(in_subquery) => Ok(Transformed::yes(Expr::InSubquery(InSubquery { expr: in_subquery.expr, subquery: Self::handle_subquery(in_subquery.subquery)?, negated: in_subquery.negated, }))), - Expr::ScalarSubquery(scalar_subquery) => Ok(Transformed::Yes(Expr::ScalarSubquery( + Expr::ScalarSubquery(scalar_subquery) => Ok(Transformed::yes(Expr::ScalarSubquery( Self::handle_subquery(scalar_subquery)?, ))), - _ => Ok(Transformed::No(expr)), + _ => Ok(Transformed::no(expr)), } } fn handle_subquery(subquery: Subquery) -> DfResult { let mut rewriter = PlanRewriter::default(); - let mut rewrote_subquery = subquery.subquery.as_ref().clone().rewrite(&mut rewriter)?; + let mut rewrote_subquery = subquery + .subquery + .as_ref() + .clone() + .rewrite(&mut rewriter)? + .data; // Workaround. DF doesn't support the first plan in subquery to be an Extension if matches!(rewrote_subquery, LogicalPlan::Extension(_)) { let output_schema = rewrote_subquery.schema().clone(); @@ -232,35 +237,34 @@ impl PlanRewriter { } impl TreeNodeRewriter for PlanRewriter { - type N = LogicalPlan; + type Node = LogicalPlan; /// descend - fn pre_visit<'a>(&'a mut self, node: &'a Self::N) -> DfResult { + fn f_down<'a>(&mut self, node: Self::Node) -> DfResult> { self.level += 1; self.stack.push((node.clone(), self.level)); // decendening will clear the stage self.stage.clear(); self.set_unexpanded(); self.partition_cols = None; - - Ok(RewriteRecursion::Continue) + Ok(Transformed::no(node)) } /// ascend /// /// Besure to call `pop_stack` before returning - fn mutate(&mut self, node: Self::N) -> DfResult { + fn f_up(&mut self, node: Self::Node) -> DfResult> { // only expand once on each ascending if self.is_expanded() { self.pop_stack(); - return Ok(node); + return Ok(Transformed::no(node)); } // only expand when the leaf is table scan if node.inputs().is_empty() && !matches!(node, LogicalPlan::TableScan(_)) { self.set_expanded(); self.pop_stack(); - return Ok(node); + return Ok(Transformed::no(node)); } self.maybe_set_partitions(&node); @@ -270,12 +274,12 @@ impl TreeNodeRewriter for PlanRewriter { let mut node = MergeScanLogicalPlan::new(node, false).into_logical_plan(); // expand stages for new_stage in self.stage.drain(..) { - node = new_stage.with_new_inputs(&[node])? + node = new_stage.with_new_exprs(node.expressions(), vec![node.clone()])? } self.set_expanded(); self.pop_stack(); - return Ok(node); + return Ok(Transformed::yes(node)); }; // TODO(ruihang): avoid this clone @@ -285,16 +289,16 @@ impl TreeNodeRewriter for PlanRewriter { let mut node = MergeScanLogicalPlan::new(node, false).into_logical_plan(); // expand stages for new_stage in self.stage.drain(..) { - node = new_stage.with_new_inputs(&[node])? + node = new_stage.with_new_exprs(node.expressions(), vec![node.clone()])? } self.set_expanded(); self.pop_stack(); - return Ok(node); + return Ok(Transformed::yes(node)); } self.pop_stack(); - Ok(node) + Ok(Transformed::no(node)) } } diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index 5bb125e1ba..f59dc57268 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -107,6 +107,7 @@ impl Categorizer { LogicalPlan::Dml(_) => Commutativity::Unsupported, LogicalPlan::Ddl(_) => Commutativity::Unsupported, LogicalPlan::Copy(_) => Commutativity::Unsupported, + LogicalPlan::RecursiveQuery(_) => Commutativity::Unsupported, } } @@ -142,8 +143,7 @@ impl Categorizer { | Expr::Between(_) | Expr::Sort(_) | Expr::Exists(_) - | Expr::ScalarFunction(_) - | Expr::ScalarUDF(_) => Commutativity::Commutative, + | Expr::ScalarFunction(_) => Commutativity::Commutative, Expr::Like(_) | Expr::SimilarTo(_) @@ -155,14 +155,13 @@ impl Categorizer { | Expr::TryCast(_) | Expr::AggregateFunction(_) | Expr::WindowFunction(_) - | Expr::AggregateUDF(_) | Expr::InList(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_) - | Expr::Wildcard => Commutativity::Unimplemented, + | Expr::Wildcard { .. } => Commutativity::Unimplemented, Expr::Alias(_) - | Expr::QualifiedWildcard { .. } + | Expr::Unnest(_) | Expr::GroupingSet(_) | Expr::Placeholder(_) | Expr::OuterReferenceColumn(_, _) => Commutativity::Unimplemented, diff --git a/src/query/src/dist_plan/merge_scan.rs b/src/query/src/dist_plan/merge_scan.rs index ed31ecb0c8..1f294a836b 100644 --- a/src/query/src/dist_plan/merge_scan.rs +++ b/src/query/src/dist_plan/merge_scan.rs @@ -33,15 +33,18 @@ use common_telemetry::tracing_context::TracingContext; use datafusion::physical_plan::metrics::{ Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, MetricsSet, Time, }; -use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning}; -use datafusion_common::{Result, Statistics}; +use datafusion::physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties, +}; +use datafusion_common::Result; use datafusion_expr::{Extension, LogicalPlan, UserDefinedLogicalNodeCore}; -use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr::EquivalenceProperties; use datatypes::schema::{Schema, SchemaRef}; use futures_util::StreamExt; use greptime_proto::v1::region::{QueryRequest, RegionRequestHeader}; use meter_core::data::ReadItem; use meter_macros::read_meter; +use session::context::QueryContextRef; use snafu::ResultExt; use store_api::storage::RegionId; use tokio::time::Instant; @@ -123,6 +126,8 @@ pub struct MergeScanExec { arrow_schema: ArrowSchemaRef, region_query_handler: RegionQueryHandlerRef, metric: ExecutionPlanMetricsSet, + properties: PlanProperties, + query_ctx: QueryContextRef, } impl std::fmt::Debug for MergeScanExec { @@ -142,8 +147,14 @@ impl MergeScanExec { substrait_plan: Bytes, arrow_schema: &ArrowSchema, region_query_handler: RegionQueryHandlerRef, + query_ctx: QueryContextRef, ) -> Result { let arrow_schema_without_metadata = Self::arrow_schema_without_metadata(arrow_schema); + let properties = PlanProperties::new( + EquivalenceProperties::new(arrow_schema_without_metadata.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); let schema_without_metadata = Self::arrow_schema_to_schema(arrow_schema_without_metadata.clone())?; Ok(Self { @@ -154,6 +165,8 @@ impl MergeScanExec { arrow_schema: arrow_schema_without_metadata, region_query_handler, metric: ExecutionPlanMetricsSet::new(), + properties, + query_ctx, }) } @@ -166,6 +179,7 @@ impl MergeScanExec { let dbname = context.task_id().unwrap_or_default(); let tracing_context = TracingContext::from_json(context.session_id().as_str()); + let tz = self.query_ctx.timezone().to_string(); let stream = Box::pin(stream!({ MERGE_SCAN_REGIONS.observe(regions.len() as f64); @@ -178,6 +192,7 @@ impl MergeScanExec { header: Some(RegionRequestHeader { tracing_context: tracing_context.to_w3c(), dbname: dbname.clone(), + timezone: tz.clone(), }), region_id: region_id.into(), plan: substrait_plan.clone(), @@ -266,12 +281,8 @@ impl ExecutionPlan for MergeScanExec { self.arrow_schema.clone() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - None + fn properties(&self) -> &PlanProperties { + &self.properties } fn children(&self) -> Vec> { @@ -297,10 +308,6 @@ impl ExecutionPlan for MergeScanExec { ))) } - fn statistics(&self) -> Statistics { - Statistics::default() - } - fn metrics(&self) -> Option { Some(self.metric.clone_inner()) } diff --git a/src/query/src/dist_plan/planner.rs b/src/query/src/dist_plan/planner.rs index c3e0cca94e..1d29fe7aba 100644 --- a/src/query/src/dist_plan/planner.rs +++ b/src/query/src/dist_plan/planner.rs @@ -25,10 +25,11 @@ use datafusion::datasource::DefaultTableSource; use datafusion::execution::context::SessionState; use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner}; -use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeVisitor, VisitRecursion}; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor}; use datafusion_common::TableReference; use datafusion_expr::{LogicalPlan, UserDefinedLogicalNode}; use datafusion_optimizer::analyzer::Analyzer; +use session::context::QueryContext; use snafu::{OptionExt, ResultExt}; use store_api::storage::RegionId; use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; @@ -103,12 +104,18 @@ impl ExtensionPlanner for DistExtensionPlanner { .encode(&amended_plan) .context(error::EncodeSubstraitLogicalPlanSnafu)? .into(); + + let query_ctx = session_state + .config() + .get_extension() + .unwrap_or_else(QueryContext::arc); let merge_scan_plan = MergeScanExec::new( table_name, regions, substrait_plan, &schema, self.region_query_handler.clone(), + query_ctx, )?; Ok(Some(Arc::new(merge_scan_plan) as _)) } @@ -125,6 +132,7 @@ impl DistExtensionPlanner { /// Apply the fully resolved table name to the TableScan plan fn plan_with_full_table_name(plan: LogicalPlan, name: &TableName) -> Result { plan.transform(&|plan| TableNameRewriter::rewrite_table_name(plan, name)) + .map(|x| x.data) } async fn get_regions(&self, table_name: &TableName) -> Result> { @@ -163,9 +171,9 @@ struct TableNameExtractor { } impl TreeNodeVisitor for TableNameExtractor { - type N = LogicalPlan; + type Node = LogicalPlan; - fn pre_visit(&mut self, node: &Self::N) -> Result { + fn f_down(&mut self, node: &Self::Node) -> Result { match node { LogicalPlan::TableScan(scan) => { if let Some(source) = scan.source.as_any().downcast_ref::() { @@ -182,7 +190,7 @@ impl TreeNodeVisitor for TableNameExtractor { info.name.clone(), )); } - return Ok(VisitRecursion::Stop); + return Ok(TreeNodeRecursion::Stop); } } match &scan.table_name { @@ -192,32 +200,32 @@ impl TreeNodeVisitor for TableNameExtractor { table, } => { self.table_name = Some(TableName::new( - catalog.clone(), - schema.clone(), - table.clone(), + catalog.to_string(), + schema.to_string(), + table.to_string(), )); - Ok(VisitRecursion::Stop) + Ok(TreeNodeRecursion::Stop) } // TODO(ruihang): Maybe the following two cases should not be valid TableReference::Partial { schema, table } => { self.table_name = Some(TableName::new( DEFAULT_CATALOG_NAME.to_string(), - schema.clone(), - table.clone(), + schema.to_string(), + table.to_string(), )); - Ok(VisitRecursion::Stop) + Ok(TreeNodeRecursion::Stop) } TableReference::Bare { table } => { self.table_name = Some(TableName::new( DEFAULT_CATALOG_NAME.to_string(), DEFAULT_SCHEMA_NAME.to_string(), - table.clone(), + table.to_string(), )); - Ok(VisitRecursion::Stop) + Ok(TreeNodeRecursion::Stop) } } } - _ => Ok(VisitRecursion::Continue), + _ => Ok(TreeNodeRecursion::Continue), } } } @@ -236,9 +244,9 @@ impl TableNameRewriter { name.schema_name.clone(), name.table_name.clone(), ); - Transformed::Yes(LogicalPlan::TableScan(table_scan)) + Transformed::yes(LogicalPlan::TableScan(table_scan)) } - _ => Transformed::No(plan), + _ => Transformed::no(plan), }) } } diff --git a/src/query/src/metrics.rs b/src/query/src/metrics.rs index ac9397eec3..d091f9c91b 100644 --- a/src/query/src/metrics.rs +++ b/src/query/src/metrics.rs @@ -76,6 +76,10 @@ impl OnDone { } impl RecordBatchStream for OnDone { + fn name(&self) -> &str { + self.stream.name() + } + fn schema(&self) -> SchemaRef { self.stream.schema() } diff --git a/src/query/src/optimizer/order_hint.rs b/src/query/src/optimizer/order_hint.rs index b9a34df0a8..6c027568a2 100644 --- a/src/query/src/optimizer/order_hint.rs +++ b/src/query/src/optimizer/order_hint.rs @@ -15,7 +15,7 @@ use arrow_schema::SortOptions; use common_recordbatch::OrderOption; use datafusion::datasource::DefaultTableSource; -use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeVisitor, VisitRecursion}; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor}; use datafusion_common::Result as DataFusionResult; use datafusion_expr::expr::Sort; use datafusion_expr::{Expr, LogicalPlan}; @@ -48,6 +48,7 @@ impl OrderHintRule { if let Some(order_expr) = visitor.order_expr.take() { plan.clone() .transform_down(&|plan| Self::set_ordering_hint(plan, &order_expr)) + .map(|x| x.data) } else { Ok(plan.clone()) } @@ -74,7 +75,7 @@ impl OrderHintRule { for sort in order_expr { let name = match sort.expr.try_into_col() { Ok(col) => col.name, - Err(_) => return Ok(Transformed::No(plan)), + Err(_) => return Ok(Transformed::no(plan)), }; opts.push(OrderOption { name, @@ -89,12 +90,12 @@ impl OrderHintRule { } } if transformed { - Ok(Transformed::Yes(plan)) + Ok(Transformed::yes(plan)) } else { - Ok(Transformed::No(plan)) + Ok(Transformed::no(plan)) } } - _ => Ok(Transformed::No(plan)), + _ => Ok(Transformed::no(plan)), } } } @@ -106,9 +107,9 @@ struct OrderHintVisitor { } impl TreeNodeVisitor for OrderHintVisitor { - type N = LogicalPlan; + type Node = LogicalPlan; - fn pre_visit(&mut self, node: &Self::N) -> DataFusionResult { + fn f_down(&mut self, node: &Self::Node) -> DataFusionResult { if let LogicalPlan::Sort(sort) = node { let mut exprs = vec![]; for expr in &sort.expr { @@ -118,7 +119,7 @@ impl TreeNodeVisitor for OrderHintVisitor { } self.order_expr = Some(exprs); } - Ok(VisitRecursion::Continue) + Ok(TreeNodeRecursion::Continue) } } diff --git a/src/query/src/optimizer/string_normalization.rs b/src/query/src/optimizer/string_normalization.rs index 1dfcc4d518..56fb36c3df 100644 --- a/src/query/src/optimizer/string_normalization.rs +++ b/src/query/src/optimizer/string_normalization.rs @@ -32,10 +32,11 @@ impl AnalyzerRule for StringNormalizationRule { let expr = plan .expressions() .into_iter() - .map(|e| e.rewrite(&mut converter)) + .map(|e| e.rewrite(&mut converter).map(|x| x.data)) .collect::>>()?; - plan.with_new_exprs(expr, &inputs).map(Transformed::Yes) + plan.with_new_exprs(expr, inputs).map(Transformed::yes) }) + .map(|x| x.data) } fn name(&self) -> &str { @@ -46,12 +47,12 @@ impl AnalyzerRule for StringNormalizationRule { struct StringNormalizationConverter; impl TreeNodeRewriter for StringNormalizationConverter { - type N = Expr; + type Node = Expr; /// remove extra whitespaces from the String value when /// there is a CAST from a String to Timestamp. /// Otherwise - no modifications applied - fn mutate(&mut self, expr: Expr) -> Result { + fn f_up(&mut self, expr: Expr) -> Result> { let new_expr = match expr { Expr::Cast(Cast { expr, data_type }) => { let expr = match data_type { @@ -71,7 +72,7 @@ impl TreeNodeRewriter for StringNormalizationConverter { } expr => expr, }; - Ok(new_expr) + Ok(Transformed::yes(new_expr)) } } diff --git a/src/query/src/optimizer/type_conversion.rs b/src/query/src/optimizer/type_conversion.rs index aa1fc73d4c..883d1aff5f 100644 --- a/src/query/src/optimizer/type_conversion.rs +++ b/src/query/src/optimizer/type_conversion.rs @@ -48,8 +48,8 @@ impl ExtensionAnalyzerRule for TypeConversionRule { schema: filter.input.schema().clone(), query_ctx: ctx.query_ctx(), }; - let rewritten = filter.predicate.clone().rewrite(&mut converter)?; - Ok(Transformed::Yes(LogicalPlan::Filter(Filter::try_new( + let rewritten = filter.predicate.clone().rewrite(&mut converter)?.data; + Ok(Transformed::yes(LogicalPlan::Filter(Filter::try_new( rewritten, filter.input, )?))) @@ -68,9 +68,9 @@ impl ExtensionAnalyzerRule for TypeConversionRule { }; let rewrite_filters = filters .into_iter() - .map(|e| e.rewrite(&mut converter)) + .map(|e| e.rewrite(&mut converter).map(|x| x.data)) .collect::>>()?; - Ok(Transformed::Yes(LogicalPlan::TableScan(TableScan { + Ok(Transformed::yes(LogicalPlan::TableScan(TableScan { table_name: table_name.clone(), source: source.clone(), projection, @@ -100,10 +100,10 @@ impl ExtensionAnalyzerRule for TypeConversionRule { let expr = plan .expressions() .into_iter() - .map(|e| e.rewrite(&mut converter)) + .map(|e| e.rewrite(&mut converter).map(|x| x.data)) .collect::>>()?; - plan.with_new_exprs(expr, &inputs).map(Transformed::Yes) + plan.with_new_exprs(expr, inputs).map(Transformed::yes) } LogicalPlan::Subquery { .. } @@ -116,8 +116,10 @@ impl ExtensionAnalyzerRule for TypeConversionRule { | LogicalPlan::Unnest(_) | LogicalPlan::Statement(_) | LogicalPlan::Ddl(_) - | LogicalPlan::Copy(_) => Ok(Transformed::No(plan)), + | LogicalPlan::Copy(_) + | LogicalPlan::RecursiveQuery(_) => Ok(Transformed::no(plan)), }) + .map(|x| x.data) } fn name(&self) -> &str { @@ -155,9 +157,9 @@ impl TypeConverter { _ => Ok(ScalarValue::Boolean(None)), }, (target_type, value) => { - let value_arr = value.to_array(); - let arr = - compute::cast(&value_arr, target_type).map_err(DataFusionError::ArrowError)?; + let value_arr = value.to_array()?; + let arr = compute::cast(&value_arr, target_type) + .map_err(|e| DataFusionError::ArrowError(e, None))?; ScalarValue::try_from_array( &arr, @@ -207,9 +209,9 @@ impl TypeConverter { } impl TreeNodeRewriter for TypeConverter { - type N = Expr; + type Node = Expr; - fn mutate(&mut self, expr: Expr) -> Result { + fn f_up(&mut self, expr: Expr) -> Result> { let new_expr = match expr { Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { Operator::Eq @@ -275,7 +277,7 @@ impl TreeNodeRewriter for TypeConverter { }, expr => expr, }; - Ok(new_expr) + Ok(Transformed::yes(new_expr)) } } @@ -310,7 +312,9 @@ mod tests { use std::sync::Arc; use datafusion::logical_expr::expr::AggregateFunction as AggrExpr; - use datafusion_common::{Column, DFField, DFSchema}; + use datafusion_common::arrow::datatypes::Field; + use datafusion_common::{Column, DFSchema}; + use datafusion_expr::expr::AggregateFunctionDefinition; use datafusion_expr::{AggregateFunction, LogicalPlanBuilder}; use datafusion_sql::TableReference; use session::context::QueryContext; @@ -390,11 +394,13 @@ mod tests { let schema = Arc::new( DFSchema::new_with_metadata( - vec![DFField::new( + vec![( None::, - "ts", - DataType::Timestamp(ArrowTimeUnit::Millisecond, None), - true, + Arc::new(Field::new( + "ts", + DataType::Timestamp(ArrowTimeUnit::Millisecond, None), + true, + )), )], HashMap::new(), ) @@ -411,12 +417,13 @@ mod tests { None ))), converter - .mutate( + .f_up( Expr::Column(Column::from_name("ts")).gt(Expr::Literal(ScalarValue::Utf8( Some("2020-09-08T05:42:29+08:00".to_string()), ))) ) .unwrap() + .data ); } @@ -425,11 +432,9 @@ mod tests { let col_name = "is_valid"; let schema = Arc::new( DFSchema::new_with_metadata( - vec![DFField::new( + vec![( None::, - col_name, - DataType::Boolean, - false, + Arc::new(Field::new(col_name, DataType::Boolean, false)), )], HashMap::new(), ) @@ -444,11 +449,12 @@ mod tests { Expr::Column(Column::from_name(col_name)) .eq(Expr::Literal(ScalarValue::Boolean(Some(true)))), converter - .mutate( + .f_up( Expr::Column(Column::from_name(col_name)) .eq(Expr::Literal(ScalarValue::Utf8(Some("true".to_string())))) ) .unwrap() + .data ); } @@ -475,11 +481,12 @@ mod tests { .aggregate( Vec::::new(), vec![Expr::AggregateFunction(AggrExpr { - fun: AggregateFunction::Count, + func_def: AggregateFunctionDefinition::BuiltIn(AggregateFunction::Count), args: vec![Expr::Column(Column::from_name("column1"))], distinct: false, filter: None, order_by: None, + null_treatment: None, })], ) .unwrap() diff --git a/src/query/src/parser.rs b/src/query/src/parser.rs index 4da25d3649..859ae924e7 100644 --- a/src/query/src/parser.rs +++ b/src/query/src/parser.rs @@ -305,9 +305,9 @@ mod test { sort_by: [], \ having: None, \ named_window: [], \ - qualify: None \ - }), order_by: [], limit: None, offset: None, fetch: None, locks: [] } }))"); - + qualify: None, \ + value_table_mode: None \ + }), order_by: [], limit: None, limit_by: [], offset: None, fetch: None, locks: [], for_clause: None } }))"); assert_eq!(format!("{stmt:?}"), expected); } diff --git a/src/query/src/plan.rs b/src/query/src/plan.rs index 4462302d98..0e2dd710e7 100644 --- a/src/query/src/plan.rs +++ b/src/query/src/plan.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use std::fmt::{Debug, Display}; use common_query::prelude::ScalarValue; +use datafusion_common::ParamValues; use datafusion_expr::LogicalPlan as DfLogicalPlan; use datatypes::data_type::ConcreteDataType; use datatypes::schema::Schema; @@ -82,7 +83,7 @@ impl LogicalPlan { let LogicalPlan::DfPlan(plan) = self; plan.clone() - .replace_params_with_values(values) + .replace_params_with_values(&ParamValues::List(values.to_vec())) .context(DataFusionSnafu) .map(LogicalPlan::DfPlan) } diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs index 18af09973e..96e8825518 100644 --- a/src/query/src/query_engine/state.rs +++ b/src/query/src/query_engine/state.rs @@ -26,7 +26,6 @@ use common_function::state::FunctionState; use common_query::physical_plan::SessionContext; use common_query::prelude::ScalarUdf; use common_telemetry::warn; -use datafusion::catalog::MemoryCatalogList; use datafusion::dataframe::DataFrame; use datafusion::error::Result as DfResult; use datafusion::execution::context::{QueryPlanner, SessionConfig, SessionState}; @@ -101,18 +100,14 @@ impl QueryEngineState { let mut optimizer = Optimizer::new(); optimizer.rules.push(Arc::new(OrderHintRule)); - let session_state = SessionState::new_with_config_rt_and_catalog_list( - session_config, - runtime_env, - Arc::new(MemoryCatalogList::default()), // pass a dummy catalog list - ) - .with_serializer_registry(Arc::new(ExtensionSerializer)) - .with_analyzer_rules(analyzer.rules) - .with_query_planner(Arc::new(DfQueryPlanner::new( - catalog_list.clone(), - region_query_handler, - ))) - .with_optimizer_rules(optimizer.rules); + let session_state = SessionState::new_with_config_rt(session_config, runtime_env) + .with_serializer_registry(Arc::new(ExtensionSerializer)) + .with_analyzer_rules(analyzer.rules) + .with_query_planner(Arc::new(DfQueryPlanner::new( + catalog_list.clone(), + region_query_handler, + ))) + .with_optimizer_rules(optimizer.rules); let df_context = SessionContext::new_with_state(session_state); diff --git a/src/query/src/range_select/plan.rs b/src/query/src/range_select/plan.rs index bff30d4789..d31097efc0 100644 --- a/src/query/src/range_select/plan.rs +++ b/src/query/src/range_select/plan.rs @@ -33,12 +33,14 @@ use datafusion::execution::context::SessionState; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::udaf::create_aggregate_expr as create_aggr_udf_expr; use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, RecordBatchStream, SendableRecordBatchStream, }; use datafusion::physical_planner::create_physical_sort_expr; +use datafusion_common::hash_utils::create_hashes; use datafusion_common::utils::{get_arrayref_at_indices, get_row_at_idx}; -use datafusion_common::{DFField, DFSchema, DFSchemaRef, DataFusionError, ScalarValue}; +use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, ScalarValue}; +use datafusion_expr::expr::AggregateFunctionDefinition; use datafusion_expr::utils::{exprlist_to_fields, COUNT_STAR_EXPANSION}; use datafusion_expr::{ lit, Accumulator, AggregateFunction, Expr, ExprSchemable, LogicalPlan, @@ -46,9 +48,9 @@ use datafusion_expr::{ }; use datafusion_physical_expr::aggregate::utils::down_cast_any_ref; use datafusion_physical_expr::expressions::create_aggregate_expr as create_aggr_expr; -use datafusion_physical_expr::hash_utils::create_hashes; use datafusion_physical_expr::{ - create_physical_expr, AggregateExpr, Distribution, PhysicalExpr, PhysicalSortExpr, + create_physical_expr, AggregateExpr, Distribution, EquivalenceProperties, Partitioning, + PhysicalExpr, PhysicalSortExpr, }; use datatypes::arrow::array::{ Array, ArrayRef, TimestampMillisecondArray, TimestampMillisecondBuilder, UInt32Builder, @@ -181,7 +183,7 @@ impl Accumulator for RangeFirstListValueAcc { Ok(()) } - fn evaluate(&self) -> DataFusionResult { + fn evaluate(&mut self) -> DataFusionResult { Ok(self.data.clone().unwrap_or(ScalarValue::Null)) } @@ -189,7 +191,7 @@ impl Accumulator for RangeFirstListValueAcc { std::mem::size_of_val(self) } - fn state(&self) -> DataFusionResult> { + fn state(&mut self) -> DataFusionResult> { unreachable!("Accumulator::state will not be used in range query") } @@ -466,12 +468,13 @@ impl RangeSelect { fill, .. }| { - Ok(DFField::new_unqualified( + let field = Field::new( name, data_type.clone(), // Only when data fill with Const option, the data can't be null !matches!(fill, Some(Fill::Const(..))), - )) + ); + Ok((None, Arc::new(field))) }, ) .collect::>>() @@ -480,11 +483,10 @@ impl RangeSelect { let ts_field = time_index .to_field(input.schema().as_ref()) .context(DataFusionSnafu)?; - let time_index_name = ts_field.name().clone(); + let time_index_name = ts_field.1.name().clone(); fields.push(ts_field); // add by - let by_fields = - exprlist_to_fields(by.iter().collect::>(), &input).context(DataFusionSnafu)?; + let by_fields = exprlist_to_fields(&by, &input).context(DataFusionSnafu)?; fields.extend(by_fields.clone()); let schema_before_project = Arc::new( DFSchema::new_with_metadata(fields, input.schema().metadata().clone()) @@ -502,7 +504,6 @@ impl RangeSelect { if let Expr::Column(column) = project_expr { schema_before_project .index_of_column_by_name(column.relation.as_ref(), &column.name) - .unwrap_or(None) .ok_or(()) } else { Err(()) @@ -513,7 +514,10 @@ impl RangeSelect { let schema = if let Some(project) = &schema_project { let project_field = project .iter() - .map(|i| schema_before_project.fields()[*i].clone()) + .map(|i| { + let f = schema_before_project.qualified_field(*i); + (f.0.cloned(), Arc::new(f.1.clone())) + }) .collect(); Arc::new( DFSchema::new_with_metadata(project_field, input.schema().metadata().clone()) @@ -555,6 +559,8 @@ impl UserDefinedLogicalNodeCore for RangeSelect { self.range_expr .iter() .map(|expr| expr.expr.clone()) + .chain([self.time_expr.clone()]) + .chain(self.by.clone()) .collect() } @@ -578,18 +584,32 @@ impl UserDefinedLogicalNodeCore for RangeSelect { ) } - fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { + fn from_template(&self, exprs: &[Expr], inputs: &[LogicalPlan]) -> Self { assert!(!inputs.is_empty()); - + assert!(exprs.len() == self.range_expr.len() + self.by.len() + 1); + let range_expr = exprs + .iter() + .zip(self.range_expr.iter()) + .map(|(e, range)| RangeFn { + name: range.name.clone(), + data_type: range.data_type.clone(), + expr: e.clone(), + range: range.range, + fill: range.fill.clone(), + need_cast: range.need_cast, + }) + .collect(); + let time_expr = exprs[self.range_expr.len()].clone(); + let by = exprs[self.range_expr.len() + 1..].to_vec(); Self { align: self.align, align_to: self.align_to, - range_expr: self.range_expr.clone(), + range_expr, input: Arc::new(inputs[0].clone()), time_index: self.time_index.clone(), - time_expr: self.time_expr.clone(), + time_expr, schema: self.schema.clone(), - by: self.by.clone(), + by, by_schema: self.by_schema.clone(), schema_project: self.schema_project.clone(), schema_before_project: self.schema_before_project.clone(), @@ -603,7 +623,6 @@ impl RangeSelect { is_count_aggr: bool, exprs: &[Expr], df_schema: &Arc, - schema: &Schema, session_state: &SessionState, ) -> DfResult>> { exprs @@ -614,13 +633,12 @@ impl RangeSelect { // At this time, aggregate plan has been replaced by a custom range plan, // so `CountWildcardRule` has not been applied. // We manually modify it when creating the physical plan. - Expr::Wildcard if is_count_aggr => create_physical_expr( + Expr::Wildcard { .. } if is_count_aggr => create_physical_expr( &lit(COUNT_STAR_EXPANSION), - df_schema, - schema, + df_schema.as_ref(), session_state.execution_props(), ), - _ => create_physical_expr(e, df_schema, schema, session_state.execution_props()), + _ => create_physical_expr(e, df_schema.as_ref(), session_state.execution_props()), }) .collect::>>() } @@ -650,10 +668,25 @@ impl RangeSelect { .iter() .map(|range_fn| { let expr = match &range_fn.expr { - Expr::AggregateFunction(aggr) - if aggr.fun == AggregateFunction::FirstValue - || aggr.fun == AggregateFunction::LastValue => - { + Expr::AggregateFunction( + aggr @ datafusion_expr::expr::AggregateFunction { + func_def: + AggregateFunctionDefinition::BuiltIn(AggregateFunction::FirstValue), + .. + }, + ) + | Expr::AggregateFunction( + aggr @ datafusion_expr::expr::AggregateFunction { + func_def: + AggregateFunctionDefinition::BuiltIn(AggregateFunction::LastValue), + .. + }, + ) => { + let is_last_value_func = matches!( + aggr.func_def, + AggregateFunctionDefinition::BuiltIn(AggregateFunction::LastValue) + ); + // Because we only need to find the first_value/last_value, // the complexity of sorting the entire batch is O(nlogn). // We can sort the batch with limit 1. @@ -665,13 +698,12 @@ impl RangeSelect { .map(|x| { create_physical_sort_expr( x, - input_dfschema, - &input_schema, + input_dfschema.as_ref(), session_state.execution_props(), ) .map(|expr| { // reverse the last_value sort - if aggr.fun == AggregateFunction::LastValue { + if is_last_value_func { PhysicalSortExpr { expr: expr.expr, options: SortOptions { @@ -689,14 +721,13 @@ impl RangeSelect { // if user not assign order by, time index is needed as default ordering let time_index = create_physical_expr( &self.time_expr, - input_dfschema, - &input_schema, + input_dfschema.as_ref(), session_state.execution_props(), )?; vec![PhysicalSortExpr { expr: time_index, options: SortOptions { - descending: aggr.fun == AggregateFunction::LastValue, + descending: is_last_value_func, nulls_first: false, }, }] @@ -705,7 +736,6 @@ impl RangeSelect { false, &aggr.args, input_dfschema, - &input_schema, session_state, )?; // first_value/last_value has only one param. @@ -723,8 +753,7 @@ impl RangeSelect { .map(|x| { create_physical_sort_expr( x, - input_dfschema, - &input_schema, + input_dfschema.as_ref(), session_state.execution_props(), ) }) @@ -732,36 +761,39 @@ impl RangeSelect { } else { vec![] }; - let expr = create_aggr_expr( - &aggr.fun, - false, - &self.create_physical_expr_list( - aggr.fun == AggregateFunction::Count, - &aggr.args, - input_dfschema, - &input_schema, - session_state, - )?, - &order_by, - &input_schema, - range_fn.expr.display_name()?, + + let input_phy_exprs = self.create_physical_expr_list( + matches!( + aggr.func_def, + AggregateFunctionDefinition::BuiltIn(AggregateFunction::Count,) + ), + &aggr.args, + input_dfschema, + session_state, )?; - Ok(expr) - } - Expr::AggregateUDF(aggr_udf) => { - let expr = create_aggr_udf_expr( - &aggr_udf.fun, - &self.create_physical_expr_list( + match &aggr.func_def { + AggregateFunctionDefinition::BuiltIn(fun) => create_aggr_expr( + fun, false, - &aggr_udf.args, - input_dfschema, + &input_phy_exprs, + &order_by, &input_schema, - session_state, - )?, - &input_schema, - range_fn.expr.display_name()?, - )?; - Ok(expr) + range_fn.expr.display_name()?, + false, + ), + AggregateFunctionDefinition::UDF(fun) => create_aggr_udf_expr( + fun, + &input_phy_exprs, + &[], + &[], + &input_schema, + range_fn.expr.display_name()?, + false, + ), + f => Err(DataFusionError::NotImplemented(format!( + "Range function from {f:?}" + ))), + } } _ => Err(DataFusionError::Plan(format!( "Unexpected Expr:{} in RangeSelect", @@ -788,24 +820,25 @@ impl RangeSelect { } else { schema_before_project.clone() }; + let by = self.create_physical_expr_list(false, &self.by, input_dfschema, session_state)?; + let cache = PlanProperties::new( + EquivalenceProperties::new(schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); Ok(Arc::new(RangeSelectExec { input: exec_input, range_exec, align: self.align.as_millis() as Millisecond, align_to: self.align_to, - by: self.create_physical_expr_list( - false, - &self.by, - input_dfschema, - &input_schema, - session_state, - )?, + by, time_index: self.time_index.clone(), schema, by_schema: Arc::new(Schema::new(by_fields)), metric: ExecutionPlanMetricsSet::new(), schema_before_project, schema_project: self.schema_project.clone(), + cache, })) } } @@ -848,6 +881,7 @@ pub struct RangeSelectExec { metric: ExecutionPlanMetricsSet, schema_project: Option>, schema_before_project: SchemaRef, + cache: PlanProperties, } impl DisplayAs for RangeSelectExec { @@ -882,16 +916,12 @@ impl ExecutionPlan for RangeSelectExec { self.schema.clone() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - fn required_input_distribution(&self) -> Vec { vec![Distribution::SinglePartition] } - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.input.output_ordering() + fn properties(&self) -> &PlanProperties { + &self.cache } fn children(&self) -> Vec> { @@ -915,6 +945,7 @@ impl ExecutionPlan for RangeSelectExec { metric: self.metric.clone(), schema_before_project: self.schema_before_project.clone(), schema_project: self.schema_project.clone(), + cache: self.cache.clone(), })) } @@ -963,8 +994,8 @@ impl ExecutionPlan for RangeSelectExec { Some(self.metric.clone_inner()) } - fn statistics(&self) -> Statistics { - self.input.statistics() + fn statistics(&self) -> DataFusionResult { + Ok(Statistics::new_unknown(self.schema.as_ref())) } } @@ -1054,7 +1085,7 @@ impl RangeSelectStream { .iter() .map(|expr| { let value = expr.evaluate(batch)?; - Ok(value.into_array(batch.num_rows())) + value.into_array(batch.num_rows()) }) .collect::>>() } @@ -1168,7 +1199,7 @@ impl RangeSelectStream { for SeriesState { row, align_ts_accumulator, - } in self.series_map.values() + } in self.series_map.values_mut() { // skip empty time series if align_ts_accumulator.is_empty() { @@ -1184,8 +1215,8 @@ impl RangeSelectStream { align_ts_accumulator.keys().copied().collect::>() }; for ts in &align_ts { - if let Some(slot) = align_ts_accumulator.get(ts) { - for (column, acc) in all_scalar.iter_mut().zip(slot.iter()) { + if let Some(slot) = align_ts_accumulator.get_mut(ts) { + for (column, acc) in all_scalar.iter_mut().zip(slot.iter_mut()) { column.push(acc.evaluate()?); } } else { @@ -1415,6 +1446,11 @@ mod test { Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true), Field::new("host", DataType::Utf8, true), ])); + let cache = PlanProperties::new( + EquivalenceProperties::new(schema.clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); let range_select_exec = Arc::new(RangeSelectExec { input: memory_exec, range_exec: vec![ @@ -1450,6 +1486,7 @@ mod test { schema_project: None, by_schema: Arc::new(Schema::new(vec![Field::new("host", DataType::Utf8, true)])), metric: ExecutionPlanMetricsSet::new(), + cache, }); let sort_exec = SortExec::new( vec![ diff --git a/src/query/src/range_select/plan_rewrite.rs b/src/query/src/range_select/plan_rewrite.rs index bab81279e7..4035f20a63 100644 --- a/src/query/src/range_select/plan_rewrite.rs +++ b/src/query/src/range_select/plan_rewrite.rs @@ -25,9 +25,8 @@ use common_time::{Interval, Timestamp, Timezone}; use datafusion::datasource::DefaultTableSource; use datafusion::prelude::Column; use datafusion::scalar::ScalarValue; -use datafusion_common::tree_node::{TreeNode, TreeNodeRewriter, VisitRecursion}; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter}; use datafusion_common::{DFSchema, DataFusionError, Result as DFResult}; -use datafusion_expr::expr::ScalarUDF; use datafusion_expr::{ Aggregate, Analyze, Explain, Expr, ExprSchemable, Extension, LogicalPlan, LogicalPlanBuilder, Projection, @@ -163,11 +162,7 @@ fn parse_expr_list(args: &[Expr], start: usize, len: usize) -> DFResult args[i].clone(), other => { return Err(dispose_parse_error(*other)); @@ -195,11 +190,11 @@ macro_rules! inconsistent_check { } impl<'a> TreeNodeRewriter for RangeExprRewriter<'a> { - type N = Expr; + type Node = Expr; - fn mutate(&mut self, node: Expr) -> DFResult { - if let Expr::ScalarUDF(func) = &node { - if func.fun.name == "range_fn" { + fn f_down(&mut self, node: Expr) -> DFResult> { + if let Expr::ScalarFunction(func) = &node { + if func.name() == "range_fn" { // `range_fn(func, range, fill, byc, [byv], align, to)` // `[byv]` are variadic arguments, byc indicate the length of arguments let range_expr = self.get_range_expr(&func.args, 0)?; @@ -246,10 +241,10 @@ impl<'a> TreeNodeRewriter for RangeExprRewriter<'a> { }; let alias = Expr::Column(Column::from_name(range_fn.name.clone())); self.range_fn.insert(range_fn); - return Ok(alias); + return Ok(Transformed::yes(alias)); } } - Ok(node) + Ok(Transformed::no(node)) } } @@ -317,7 +312,7 @@ impl RangePlanRewriter { }; let new_expr = expr .iter() - .map(|expr| expr.clone().rewrite(&mut range_rewriter)) + .map(|expr| expr.clone().rewrite(&mut range_rewriter).map(|x| x.data)) .collect::>>() .context(DataFusionSnafu)?; if range_rewriter.by.is_empty() { @@ -385,7 +380,7 @@ impl RangePlanRewriter { .context(DataFusionSnafu)? .build() } - _ => plan.with_new_inputs(&inputs), + _ => plan.with_new_exprs(plan.expressions(), inputs), } .context(DataFusionSnafu)?; Ok(Some(plan)) @@ -401,10 +396,11 @@ impl RangePlanRewriter { /// If the user does not explicitly use the `by` keyword to indicate time series, /// `[row_columns]` will be use as default time series async fn get_index_by(&mut self, schema: &Arc) -> Result<(Expr, Vec)> { - let mut time_index_expr = Expr::Wildcard; + let mut time_index_expr = Expr::Wildcard { qualifier: None }; let mut default_by = vec![]; - for field in schema.fields() { - if let Some(table_ref) = field.qualifier() { + for i in 0..schema.fields().len() { + let (qualifier, _) = schema.qualified_field(i); + if let Some(table_ref) = qualifier { let table = self .table_provider .resolve_table(table_ref.clone()) @@ -446,7 +442,7 @@ impl RangePlanRewriter { } } } - if time_index_expr == Expr::Wildcard { + if matches!(time_index_expr, Expr::Wildcard { .. }) { TimeIndexNotFoundSnafu { table: schema.to_string(), } @@ -461,13 +457,13 @@ fn have_range_in_exprs(exprs: &[Expr]) -> bool { exprs.iter().any(|expr| { let mut find_range = false; let _ = expr.apply(&mut |expr| { - if let Expr::ScalarUDF(ScalarUDF { fun, .. }) = expr { - if fun.name == "range_fn" { + Ok(match expr { + Expr::ScalarFunction(func) if func.name() == "range_fn" => { find_range = true; - return Ok(VisitRecursion::Stop); + TreeNodeRecursion::Stop } - } - Ok(VisitRecursion::Continue) + _ => TreeNodeRecursion::Continue, + }) }); find_range }) @@ -581,8 +577,8 @@ mod test { let query = r#"SELECT (covar(field_0 + field_1, field_1)/4) RANGE '5m' FROM test ALIGN '1h';"#; let expected = String::from( - "Projection: COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4) [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4):Float64;N]\ - \n RangeSelect: range_exprs=[COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1, test.tag_2, test.tag_3, test.tag_4], time_index=timestamp [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8]\ + "Projection: COVAR(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4) [COVAR(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4):Float64;N]\ + \n RangeSelect: range_exprs=[COVAR(test.field_0 + test.field_1,test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1, test.tag_2, test.tag_3, test.tag_4], time_index=timestamp [COVAR(test.field_0 + test.field_1,test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8]\ \n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]" ); query_plan_compare(query, expected).await; @@ -662,7 +658,7 @@ mod test { async fn complex_range_expr() { let query = r#"SELECT gcd(CAST(max(field_0 + 1) Range '5m' FILL NULL AS Int64), CAST(tag_0 AS Int64)) + round(max(field_2+1) Range '6m' FILL NULL + 1) + max(field_2+3) Range '10m' FILL NULL * CAST(tag_1 AS Float64) + 1 FROM test ALIGN '1h' by (tag_0, tag_1);"#; let expected = String::from( - "Projection: gcd(CAST(MAX(test.field_0 + Int64(1)) RANGE 5m FILL NULL AS Int64), CAST(test.tag_0 AS Int64)) + round(MAX(test.field_2 + Int64(1)) RANGE 6m FILL NULL + Int64(1)) + MAX(test.field_2 + Int64(3)) RANGE 10m FILL NULL * CAST(test.tag_1 AS Float64) + Int64(1) [gcd(MAX(test.field_0 + Int64(1)) RANGE 5m FILL NULL,test.tag_0) + round(MAX(test.field_2 + Int64(1)) RANGE 6m FILL NULL + Int64(1)) + MAX(test.field_2 + Int64(3)) RANGE 10m FILL NULL * test.tag_1 + Int64(1):Float64;N]\ + "Projection: gcd(arrow_cast(MAX(test.field_0 + Int64(1)) RANGE 5m FILL NULL, Utf8(\"Int64\")), arrow_cast(test.tag_0, Utf8(\"Int64\"))) + round(MAX(test.field_2 + Int64(1)) RANGE 6m FILL NULL + Int64(1)) + MAX(test.field_2 + Int64(3)) RANGE 10m FILL NULL * arrow_cast(test.tag_1, Utf8(\"Float64\")) + Int64(1) [gcd(arrow_cast(MAX(test.field_0 + Int64(1)) RANGE 5m FILL NULL,Utf8(\"Int64\")),arrow_cast(test.tag_0,Utf8(\"Int64\"))) + round(MAX(test.field_2 + Int64(1)) RANGE 6m FILL NULL + Int64(1)) + MAX(test.field_2 + Int64(3)) RANGE 10m FILL NULL * arrow_cast(test.tag_1,Utf8(\"Float64\")) + Int64(1):Float64;N]\ \n RangeSelect: range_exprs=[MAX(test.field_0 + Int64(1)) RANGE 5m FILL NULL, MAX(test.field_2 + Int64(1)) RANGE 6m FILL NULL, MAX(test.field_2 + Int64(3)) RANGE 10m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [MAX(test.field_0 + Int64(1)) RANGE 5m FILL NULL:Float64;N, MAX(test.field_2 + Int64(1)) RANGE 6m FILL NULL:Float64;N, MAX(test.field_2 + Int64(3)) RANGE 10m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\ \n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]" ); @@ -673,7 +669,7 @@ mod test { async fn range_linear_on_integer() { let query = r#"SELECT min(CAST(field_0 AS Int64) + CAST(field_1 AS Int64)) RANGE '5m' FILL LINEAR FROM test ALIGN '1h' by (tag_0,tag_1);"#; let expected = String::from( - "RangeSelect: range_exprs=[MIN(test.field_0 + test.field_1) RANGE 5m FILL LINEAR], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [MIN(test.field_0 + test.field_1) RANGE 5m FILL LINEAR:Float64;N]\ + "RangeSelect: range_exprs=[MIN(arrow_cast(test.field_0,Utf8(\"Int64\")) + arrow_cast(test.field_1,Utf8(\"Int64\"))) RANGE 5m FILL LINEAR], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [MIN(arrow_cast(test.field_0,Utf8(\"Int64\")) + arrow_cast(test.field_1,Utf8(\"Int64\"))) RANGE 5m FILL LINEAR:Float64;N]\ \n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]" ); query_plan_compare(query, expected).await; diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 645bac9f31..87750f3743 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -378,7 +378,7 @@ pub async fn show_index( lit("").alias(INDEX_COMMENT_COLUMN), lit(YES_STR).alias(INDEX_VISIBLE_COLUMN), null().alias(INDEX_EXPRESSION_COLUMN), - Expr::Wildcard, + Expr::Wildcard { qualifier: None }, ]; let projects = vec![ diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index 97d8aba4fb..2004590aa5 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -46,7 +46,7 @@ fn string_value(s: impl Into) -> SqlValue { fn sql_option(name: &str, value: SqlValue) -> SqlOption { SqlOption { name: name.into(), - value, + value: Expr::Value(value), } } @@ -141,6 +141,7 @@ fn create_table_constraints( name: Some(TIME_INDEX.into()), columns: vec![Ident::with_quote(quote_style, column_name)], is_primary: false, + characteristics: None, }); } if !table_meta.primary_key_indices.is_empty() { @@ -152,6 +153,7 @@ fn create_table_constraints( name: None, columns, is_primary: true, + characteristics: None, }); } diff --git a/src/script/Cargo.toml b/src/script/Cargo.toml index 50dca7ccbb..4602a334c9 100644 --- a/src/script/Cargo.toml +++ b/src/script/Cargo.toml @@ -11,6 +11,7 @@ python = [ "dep:datafusion", "dep:datafusion-common", "dep:datafusion-expr", + "dep:datafusion-functions", "dep:datafusion-physical-expr", "dep:rustpython-vm", "dep:rustpython-parser", @@ -45,6 +46,7 @@ crossbeam-utils = "0.8.14" datafusion = { workspace = true, optional = true } datafusion-common = { workspace = true, optional = true } datafusion-expr = { workspace = true, optional = true } +datafusion-functions = { workspace = true, optional = true } datafusion-physical-expr = { workspace = true, optional = true } datatypes.workspace = true futures.workspace = true @@ -54,7 +56,7 @@ paste = { workspace = true, optional = true } prometheus.workspace = true query.workspace = true # TODO(discord9): This is a forked and tweaked version of RustPython, please update it to newest original RustPython After RustPython support GC -pyo3 = { version = "0.19", optional = true, features = ["abi3", "abi3-py37"] } +pyo3 = { version = "0.20", optional = true, features = ["abi3", "abi3-py37"] } rustpython-codegen = { git = "https://github.com/discord9/RustPython", optional = true, rev = "9ed5137412" } rustpython-compiler = { git = "https://github.com/discord9/RustPython", optional = true, rev = "9ed5137412" } rustpython-compiler-core = { git = "https://github.com/discord9/RustPython", optional = true, rev = "9ed5137412" } diff --git a/src/script/src/python/engine.rs b/src/script/src/python/engine.rs index 5ebbd202ab..58e54d4b8e 100644 --- a/src/script/src/python/engine.rs +++ b/src/script/src/python/engine.rs @@ -126,6 +126,10 @@ impl Function for PyUDF { } fn signature(&self) -> common_query::prelude::Signature { + if self.copr.arg_types.is_empty() { + return Signature::any(0, Volatility::Volatile); + } + // try our best to get a type signature let mut arg_types = Vec::with_capacity(self.copr.arg_types.len()); let mut know_all_types = true; diff --git a/src/script/src/python/ffi_types/copr.rs b/src/script/src/python/ffi_types/copr.rs index 1af3f416f3..704b0db77d 100644 --- a/src/script/src/python/ffi_types/copr.rs +++ b/src/script/src/python/ffi_types/copr.rs @@ -21,8 +21,10 @@ use std::sync::{Arc, Weak}; use common_query::OutputData; use common_recordbatch::{RecordBatch, RecordBatches}; +use datafusion_common::ScalarValue; use datatypes::arrow::compute; use datatypes::data_type::{ConcreteDataType, DataType}; +use datatypes::prelude::Value; use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::vectors::{Helper, VectorRef}; // use crate::python::builtins::greptime_builtin; @@ -42,7 +44,9 @@ use vm::{pyclass as rspyclass, PyObjectRef, PyPayload, PyResult, VirtualMachine} use super::py_recordbatch::PyRecordBatch; use crate::engine::EvalContext; -use crate::python::error::{ensure, ArrowSnafu, OtherSnafu, Result, TypeCastSnafu}; +use crate::python::error::{ + ensure, ArrowSnafu, DataFusionSnafu, OtherSnafu, Result, TypeCastSnafu, +}; use crate::python::ffi_types::PyVector; #[cfg(feature = "pyo3_backend")] use crate::python::pyo3::pyo3_exec_parsed; @@ -179,6 +183,25 @@ impl Coprocessor { /// check if real types and annotation types(if have) is the same, if not try cast columns to annotated type pub(crate) fn check_and_cast_type(&self, cols: &mut [VectorRef]) -> Result<()> { + for col in cols.iter_mut() { + if let ConcreteDataType::List(x) = col.data_type() { + let values = + ScalarValue::convert_array_to_scalar_vec(col.to_arrow_array().as_ref()) + .context(DataFusionSnafu)? + .into_iter() + .flatten() + .map(Value::try_from) + .collect::, _>>() + .context(TypeCastSnafu)?; + + let mut builder = x.item_type().create_mutable_vector(values.len()); + for v in values.iter() { + builder.push_value_ref(v.as_value_ref()); + } + *col = builder.to_vector(); + } + } + let return_types = &self.return_types; // allow ignore Return Type Annotation if return_types.is_empty() { diff --git a/src/script/src/python/ffi_types/pair_tests.rs b/src/script/src/python/ffi_types/pair_tests.rs index 37e3c76994..781bae9b9e 100644 --- a/src/script/src/python/ffi_types/pair_tests.rs +++ b/src/script/src/python/ffi_types/pair_tests.rs @@ -209,6 +209,7 @@ fn eval_pyo3(case: CodeBlockTestCase) { let res_vec = locals .get_item("ret") .unwrap() + .unwrap() .extract::() .map_err(|e| { dbg!(&case.script); diff --git a/src/script/src/python/ffi_types/vector.rs b/src/script/src/python/ffi_types/vector.rs index 1f75efdecf..bbede552cf 100644 --- a/src/script/src/python/ffi_types/vector.rs +++ b/src/script/src/python/ffi_types/vector.rs @@ -525,17 +525,12 @@ pub fn val_to_pyobj(val: value::Value, vm: &VirtualMachine) -> PyResult { // FIXME(dennis): lose the timestamp unit here Value::Timestamp(v) => vm.ctx.new_int(v.value()).into(), value::Value::List(list) => { - let list = list.items().as_ref(); - match list { - Some(list) => { - let list: Vec<_> = list - .iter() - .map(|v| val_to_pyobj(v.clone(), vm)) - .collect::>()?; - vm.ctx.new_list(list).into() - } - None => vm.ctx.new_list(Vec::new()).into(), - } + let list: Vec<_> = list + .items() + .iter() + .map(|v| val_to_pyobj(v.clone(), vm)) + .collect::>()?; + vm.ctx.new_list(list).into() } #[allow(unreachable_patterns)] _ => return Err(vm.new_type_error(format!("Convert from {val:?} is not supported yet"))), diff --git a/src/script/src/python/pyo3/builtins.rs b/src/script/src/python/pyo3/builtins.rs index c4c10cc0e4..bc8f23107a 100644 --- a/src/script/src/python/pyo3/builtins.rs +++ b/src/script/src/python/pyo3/builtins.rs @@ -19,7 +19,7 @@ use common_function::function_registry::FUNCTION_REGISTRY; use datafusion::arrow::array::{ArrayRef, NullArray}; use datafusion::physical_plan::expressions; use datafusion_expr::ColumnarValue; -use datafusion_physical_expr::{math_expressions, AggregateExpr}; +use datafusion_physical_expr::AggregateExpr; use datatypes::vectors::VectorRef; use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; @@ -133,7 +133,7 @@ fn get_globals(py: Python) -> PyResult<&PyDict> { fn dataframe(py: Python) -> PyResult { let globals = get_globals(py)?; let df = globals - .get_item("__dataframe__") + .get_item("__dataframe__")? .ok_or_else(|| PyKeyError::new_err("No __dataframe__ variable is found"))? .extract::()?; Ok(df) @@ -144,7 +144,7 @@ fn dataframe(py: Python) -> PyResult { pub(crate) fn query_engine(py: Python) -> PyResult { let globals = get_globals(py)?; let query = globals - .get_item("__query__") + .get_item("__query__")? .ok_or_else(|| PyKeyError::new_err("No __query__ variable is found"))? .extract::()?; Ok(query) @@ -237,7 +237,9 @@ macro_rules! bind_call_unary_math_function { fn $DF_FUNC(py: Python<'_>, val: PyObject) -> PyResult { let args = &[all_to_f64(try_into_columnar_value(py, val)?).map_err(PyValueError::new_err)?]; - let res = math_expressions::$DF_FUNC(args).map_err(|e| PyValueError::new_err(format!("{e:?}")))?; + let res = datafusion_functions::math::$DF_FUNC() + .invoke(args) + .map_err(|e| PyValueError::new_err(format!("{e:?}")))?; columnar_value_to_py_any(py, res) } )* @@ -293,18 +295,19 @@ fn random(py: Python<'_>, len: usize) -> PyResult { // more info at: https://doc.rust-lang.org/reference/procedural-macros.html#procedural-macro-hygiene let arg = NullArray::new(len); let args = &[ColumnarValue::Array(std::sync::Arc::new(arg) as _)]; - let res = - math_expressions::random(args).map_err(|e| PyValueError::new_err(format!("{e:?}")))?; - + let res = datafusion_functions::math::random() + .invoke(args) + .map_err(|e| PyValueError::new_err(format!("{e:?}")))?; columnar_value_to_py_any(py, res) } #[pyfunction] fn round(py: Python<'_>, val: PyObject) -> PyResult { let value = try_into_columnar_value(py, val)?; - let array = value.into_array(1); - let result = - math_expressions::round(&[array]).map_err(|e| PyValueError::new_err(format!("{e:?}")))?; + let result = datafusion_functions::math::round() + .invoke(&[value]) + .and_then(|x| x.into_array(1)) + .map_err(|e| PyValueError::new_err(format!("{e:?}")))?; columnar_value_to_py_any(py, ColumnarValue::Array(result)) } @@ -368,7 +371,19 @@ fn approx_percentile_cont(py: Python<'_>, values: &PyVector, percent: f64) -> Py ) } -bind_aggr_expr!(array_agg, ArrayAgg,[v0], v0, expr0=>0); +#[pyfunction] +fn array_agg(py: Python<'_>, v: &PyVector) -> PyResult { + eval_df_aggr_expr( + py, + expressions::ArrayAgg::new( + Arc::new(expressions::Column::new("expr0", 0)) as _, + "ArrayAgg", + v.arrow_data_type(), + true, + ), + &[v.to_arrow_array()], + ) +} bind_aggr_expr!(avg, Avg,[v0], v0, expr0=>0); diff --git a/src/script/src/python/pyo3/copr_impl.rs b/src/script/src/python/pyo3/copr_impl.rs index 50c4fd3d23..2dbd574827 100644 --- a/src/script/src/python/pyo3/copr_impl.rs +++ b/src/script/src/python/pyo3/copr_impl.rs @@ -144,7 +144,9 @@ coprocessor = copr // could generate a call in python code and use Python::run to run it, just like in RustPython // Expect either: a PyVector Or a List/Tuple of PyVector py.run(&script, Some(globals), Some(locals))?; - let result = locals.get_item("_return_from_coprocessor").ok_or_else(|| PyValueError::new_err("Can't find return value of coprocessor function"))?; + let result = locals.get_item("_return_from_coprocessor")?.ok_or_else(|| + PyValueError::new_err(format!("cannot find the return value of script '{script}'")) + )?; let col_len = rb.as_ref().map(|rb| rb.num_rows()).unwrap_or(1); py_any_to_vec(result, col_len) diff --git a/src/script/src/python/pyo3/utils.rs b/src/script/src/python/pyo3/utils.rs index 04146a6a6f..9738b8ca9e 100644 --- a/src/script/src/python/pyo3/utils.rs +++ b/src/script/src/python/pyo3/utils.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::{Arc, Mutex}; +use std::sync::Mutex; use arrow::pyarrow::PyArrowException; use common_telemetry::info; @@ -27,7 +27,7 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{PyBool, PyFloat, PyInt, PyList, PyTuple}; -use crate::python::ffi_types::utils::{collect_diff_types_string, new_item_field}; +use crate::python::ffi_types::utils::collect_diff_types_string; use crate::python::ffi_types::PyVector; use crate::python::metric; use crate::python::pyo3::builtins::greptime_builtins; @@ -75,9 +75,10 @@ pub fn val_to_py_any(py: Python<'_>, val: Value) -> PyResult { Value::DateTime(val) => val.val().to_object(py), Value::Timestamp(val) => val.value().to_object(py), Value::List(val) => { - let list = val.items().clone().unwrap_or(Default::default()); - let list = list - .into_iter() + let list = val + .items() + .iter() + .cloned() .map(|v| val_to_py_any(py, v)) .collect::>>()?; list.to_object(py) @@ -211,9 +212,13 @@ pub fn scalar_value_to_py_any(py: Python<'_>, val: ScalarValue) -> PyResult Ok(py.None()), $(ScalarValue::$scalar_ty(Some(v)) => Ok(v.to_object(py)),)* - ScalarValue::List(Some(col), _) => { + ScalarValue::List(array) => { + let col = ScalarValue::convert_array_to_scalar_vec(array.as_ref()).map_err(|e| + PyValueError::new_err(format!("{e}")) + )?; let list:Vec = col .into_iter() + .flatten() .map(|v| scalar_value_to_py_any(py, v)) .collect::>()?; let list = PyList::new(py, list); @@ -281,8 +286,7 @@ pub fn try_into_columnar_value(py: Python<'_>, obj: PyObject) -> PyResult, obj: PyObject) -> PyResult Stri .unwrap_or_else(|| "Nothing".to_string()) } -fn new_item_field(data_type: ArrowDataType) -> Field { - Field::new("item", data_type, false) -} - /// try to turn a Python Object into a PyVector or a scalar that can be use for calculate /// /// supported scalar are(leftside is python data type, right side is rust type): @@ -119,8 +113,7 @@ pub fn try_into_columnar_value(obj: PyObjectRef, vm: &VirtualMachine) -> PyResul if ret.is_empty() { // TODO(dennis): empty list, we set type as null. return Ok(DFColValue::Scalar(ScalarValue::List( - None, - Arc::new(new_item_field(ArrowDataType::Null)), + ScalarValue::new_list(&[], &ArrowDataType::Null), ))); } @@ -132,8 +125,7 @@ pub fn try_into_columnar_value(obj: PyObjectRef, vm: &VirtualMachine) -> PyResul ))); } Ok(DFColValue::Scalar(ScalarValue::List( - Some(ret), - Arc::new(new_item_field(ty)), + ScalarValue::new_list(&ret, &ty), ))) } else { Err(vm.new_type_error(format!( @@ -176,9 +168,11 @@ fn scalar_val_try_into_py_obj(val: ScalarValue, vm: &VirtualMachine) -> PyResult ScalarValue::Float64(Some(v)) => Ok(PyFloat::from(v).into_pyobject(vm)), ScalarValue::Int64(Some(v)) => Ok(PyInt::from(v).into_pyobject(vm)), ScalarValue::UInt64(Some(v)) => Ok(PyInt::from(v).into_pyobject(vm)), - ScalarValue::List(Some(col), _) => { - let list = col + ScalarValue::List(list) => { + let list = ScalarValue::convert_array_to_scalar_vec(list.as_ref()) + .map_err(|e| from_df_err(e, vm))? .into_iter() + .flatten() .map(|v| scalar_val_try_into_py_obj(v, vm)) .collect::>()?; let list = vm.ctx.new_list(list); @@ -228,9 +222,10 @@ macro_rules! bind_call_unary_math_function { ($DF_FUNC: ident, $vm: ident $(,$ARG: ident)*) => { fn inner_fn($($ARG: PyObjectRef,)* vm: &VirtualMachine) -> PyResult { let args = &[$(all_to_f64(try_into_columnar_value($ARG, vm)?, vm)?,)*]; - let res = math_expressions::$DF_FUNC(args).map_err(|err| from_df_err(err, vm))?; - let ret = try_into_py_obj(res, vm)?; - Ok(ret) + datafusion_functions::math::$DF_FUNC() + .invoke(args) + .map_err(|e| from_df_err(e, vm)) + .and_then(|x| try_into_py_obj(x, vm)) } return inner_fn($($ARG,)* $vm); }; @@ -295,7 +290,6 @@ pub(crate) mod greptime_builtin { use datafusion::dataframe::DataFrame as DfDataFrame; use datafusion::physical_plan::expressions; use datafusion_expr::{ColumnarValue as DFColValue, Expr as DfExpr}; - use datafusion_physical_expr::math_expressions; use datatypes::arrow::array::{ArrayRef, Int64Array, NullArray}; use datatypes::arrow::error::ArrowError; use datatypes::arrow::{self, compute}; @@ -548,8 +542,10 @@ pub(crate) mod greptime_builtin { #[pyfunction] fn round(val: PyObjectRef, vm: &VirtualMachine) -> PyResult { let value = try_into_columnar_value(val, vm)?; - let array = value.into_array(1); - let result = math_expressions::round(&[array]).map_err(|e| from_df_err(e, vm))?; + let result = datafusion_functions::math::round() + .invoke(&[value]) + .and_then(|x| x.into_array(1)) + .map_err(|e| from_df_err(e, vm))?; try_into_py_obj(DFColValue::Array(result), vm) } @@ -604,7 +600,9 @@ pub(crate) mod greptime_builtin { // more info at: https://doc.rust-lang.org/reference/procedural-macros.html#procedural-macro-hygiene let arg = NullArray::new(len); let args = &[DFColValue::Array(std::sync::Arc::new(arg) as _)]; - let res = math_expressions::random(args).map_err(|err| from_df_err(err, vm))?; + let res = datafusion_functions::math::random() + .invoke(args) + .map_err(|err| from_df_err(err, vm))?; let ret = try_into_py_obj(res, vm)?; Ok(ret) } @@ -673,13 +671,16 @@ pub(crate) mod greptime_builtin { /// effectively equals to `list(vector)` #[pyfunction] fn array_agg(values: PyVectorRef, vm: &VirtualMachine) -> PyResult { - bind_aggr_fn!( - ArrayAgg, - vm, + eval_aggr_fn( + expressions::ArrayAgg::new( + Arc::new(expressions::Column::new("expr0", 0)) as _, + "ArrayAgg", + values.arrow_data_type(), + false, + ), &[values.to_arrow_array()], - values.arrow_data_type(), - expr0 - ); + vm, + ) } /// directly port from datafusion's `avg` function diff --git a/src/script/src/python/rspython/builtins/test.rs b/src/script/src/python/rspython/builtins/test.rs index 0245cd3e9d..3a5ea2115b 100644 --- a/src/script/src/python/rspython/builtins/test.rs +++ b/src/script/src/python/rspython/builtins/test.rs @@ -18,6 +18,7 @@ use std::io::Read; use std::path::Path; use std::sync::Arc; +use arrow::array::Array; use common_telemetry::{error, info}; use datatypes::arrow::array::{Float64Array, Int64Array}; use datatypes::arrow::compute; @@ -68,18 +69,18 @@ fn convert_scalar_to_py_obj_and_back() { } else { panic!("Convert errors, expect 1") } - let col = DFColValue::Scalar(ScalarValue::List( - Some(vec![ - ScalarValue::Int64(Some(1)), - ScalarValue::Int64(Some(2)), - ]), - Arc::new(Field::new("item", ArrowDataType::Int64, false)), - )); + let col = DFColValue::Scalar(ScalarValue::List(ScalarValue::new_list( + &[ScalarValue::Int64(Some(1)), ScalarValue::Int64(Some(2))], + &ArrowDataType::Int64, + ))); let to = try_into_py_obj(col, vm).unwrap(); let back = try_into_columnar_value(to, vm).unwrap(); - if let DFColValue::Scalar(ScalarValue::List(Some(list), field)) = back { - assert_eq!(list.len(), 2); - assert_eq!(*field.data_type(), ArrowDataType::Int64); + if let DFColValue::Scalar(ScalarValue::List(list)) = back { + assert_eq!(list.len(), 1); + assert_eq!( + list.data_type(), + &ArrowDataType::List(Arc::new(Field::new_list_field(ArrowDataType::Int64, true))) + ); } let list: Vec = vec![vm.ctx.new_int(1).into(), vm.ctx.new_int(2).into()]; let nested_list: Vec = diff --git a/src/script/src/python/rspython/builtins/testcases.ron b/src/script/src/python/rspython/builtins/testcases.ron index 8e637c6ed8..b5882048e8 100644 --- a/src/script/src/python/rspython/builtins/testcases.ron +++ b/src/script/src/python/rspython/builtins/testcases.ron @@ -707,7 +707,7 @@ from greptime import * sin(num)"#, expect: Ok(( ty: Float64, - value: Float(0.8414709848078965) + value: FloatVec([0.8414709848078965]) )) ), TestCase( @@ -722,7 +722,7 @@ from greptime import * sin(num)"#, expect: Ok(( ty: Float64, - value: Float(0.8414709848078965) + value: FloatVec([0.8414709848078965]) )) ), TestCase( @@ -732,7 +732,7 @@ from greptime import * sin(True)"#, expect: Ok(( ty: Float64, - value: Float(0.8414709848078965) + value: FloatVec([0.8414709848078965]) )) ), TestCase( @@ -747,7 +747,7 @@ from greptime import * sin(num)"#, expect: Ok(( ty: Float64, - value: Float(0.0) + value: FloatVec([0.0]) )) ), // test if string returns error correctly diff --git a/src/script/src/python/rspython/utils.rs b/src/script/src/python/rspython/utils.rs index e24b3005c3..ccaf5454fc 100644 --- a/src/script/src/python/rspython/utils.rs +++ b/src/script/src/python/rspython/utils.rs @@ -14,12 +14,13 @@ use std::sync::Arc; +use arrow::array::ArrayRef; use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue as DFColValue; use datatypes::prelude::ScalarVector; use datatypes::value::Value; use datatypes::vectors::{ - BooleanVector, Float64Vector, Helper, Int64Vector, NullVector, StringVector, VectorRef, + BooleanVector, Float64Vector, Helper, Int64Vector, StringVector, VectorRef, }; use rustpython_vm::builtins::{PyBaseExceptionRef, PyBool, PyFloat, PyInt, PyList, PyStr}; use rustpython_vm::object::PyObjectPayload; @@ -134,15 +135,9 @@ pub fn py_obj_to_vec( try_into_columnar_value(obj.clone(), vm).map_err(|e| format_py_error(e, vm))?; match columnar_value { - DFColValue::Scalar(ScalarValue::List(scalars, _datatype)) => match scalars { - Some(scalars) => { - let array = - ScalarValue::iter_to_array(scalars).context(error::DataFusionSnafu)?; - - Helper::try_into_vector(array).context(error::TypeCastSnafu) - } - None => Ok(Arc::new(NullVector::new(0))), - }, + DFColValue::Scalar(ScalarValue::List(array)) => { + Helper::try_into_vector(array as ArrayRef).context(error::TypeCastSnafu) + } _ => unreachable!(), } } else { diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index b965a3dc3f..ef8a2f751d 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -99,7 +99,7 @@ tokio.workspace = true tokio-rustls = "0.25" tokio-stream = { workspace = true, features = ["net"] } tonic.workspace = true -tonic-reflection = "0.10" +tonic-reflection = "0.11" tower = { version = "0.4", features = ["full"] } tower-http = { version = "0.4", features = ["full"] } urlencoding = "2.1" diff --git a/src/servers/src/grpc/authorize.rs b/src/servers/src/grpc/authorize.rs index ae003640ea..4b0c986912 100644 --- a/src/servers/src/grpc/authorize.rs +++ b/src/servers/src/grpc/authorize.rs @@ -20,7 +20,7 @@ use auth::UserProviderRef; use hyper::Body; use session::context::QueryContext; use tonic::body::BoxBody; -use tonic::transport::NamedService; +use tonic::server::NamedService; use tower::{Layer, Service}; use crate::http::authorize::{extract_catalog_and_schema, extract_username_and_password}; diff --git a/src/servers/src/grpc/flight.rs b/src/servers/src/grpc/flight.rs index 9ed2ed85d3..cd1d2a4bd0 100644 --- a/src/servers/src/grpc/flight.rs +++ b/src/servers/src/grpc/flight.rs @@ -21,7 +21,7 @@ use api::v1::GreptimeRequest; use arrow_flight::flight_service_server::FlightService; use arrow_flight::{ Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, - HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket, + HandshakeRequest, HandshakeResponse, PollInfo, PutResult, SchemaResult, Ticket, }; use async_trait::async_trait; use common_grpc::flight::{FlightEncoder, FlightMessage}; @@ -96,6 +96,13 @@ impl FlightService for FlightCraftWrapper { Err(Status::unimplemented("Not yet implemented")) } + async fn poll_flight_info( + &self, + _: Request, + ) -> TonicResult> { + Err(Status::unimplemented("Not yet implemented")) + } + async fn get_schema( &self, _: Request, diff --git a/src/servers/src/mysql/helper.rs b/src/servers/src/mysql/helper.rs index 6109ec706d..c5d509016d 100644 --- a/src/servers/src/mysql/helper.rs +++ b/src/servers/src/mysql/helper.rs @@ -179,6 +179,7 @@ pub fn convert_value(param: &ParamValue, t: &ConcreteDataType) -> Result(idx, &client_type)?; match server_type { ConcreteDataType::Timestamp(unit) => match *unit { - TimestampType::Second(_) => { - ScalarValue::TimestampSecond(data.map(|ts| ts.timestamp()), None) - } + TimestampType::Second(_) => ScalarValue::TimestampSecond( + data.map(|ts| ts.and_utc().timestamp()), + None, + ), TimestampType::Millisecond(_) => ScalarValue::TimestampMillisecond( - data.map(|ts| ts.timestamp_millis()), + data.map(|ts| ts.and_utc().timestamp_millis()), None, ), TimestampType::Microsecond(_) => ScalarValue::TimestampMicrosecond( - data.map(|ts| ts.timestamp_micros()), + data.map(|ts| ts.and_utc().timestamp_micros()), None, ), TimestampType::Nanosecond(_) => ScalarValue::TimestampNanosecond( - data.map(|ts| ts.timestamp_micros()), + data.map(|ts| ts.and_utc().timestamp_micros()), None, ), }, ConcreteDataType::DateTime(_) => { - ScalarValue::Date64(data.map(|d| d.timestamp_millis())) + ScalarValue::Date64(data.map(|d| d.and_utc().timestamp_millis())) } _ => { return Err(invalid_parameter_error( @@ -814,10 +815,7 @@ mod test { let err = encode_value( &query_context, - &Value::List(ListValue::new( - Some(Box::default()), - ConcreteDataType::int16_datatype(), - )), + &Value::List(ListValue::new(vec![], ConcreteDataType::int16_datatype())), &mut builder, ) .unwrap_err(); diff --git a/src/servers/src/prom_store.rs b/src/servers/src/prom_store.rs index 23c7e45c38..515a28b44b 100644 --- a/src/servers/src/prom_store.rs +++ b/src/servers/src/prom_store.rs @@ -103,11 +103,11 @@ pub fn query_to_plan(dataframe: DataFrame, q: &Query) -> Result { } // Case sensitive regexp match MatcherType::Re => { - conditions.push(regexp_match(vec![col(name), lit(value)]).is_not_null()); + conditions.push(regexp_match(col(name), lit(value)).is_not_null()); } // Case sensitive regexp not match MatcherType::Nre => { - conditions.push(regexp_match(vec![col(name), lit(value)]).is_null()); + conditions.push(regexp_match(col(name), lit(value)).is_null()); } } } diff --git a/src/session/src/context.rs b/src/session/src/context.rs index 25091e1cf5..d2c060919e 100644 --- a/src/session/src/context.rs +++ b/src/session/src/context.rs @@ -22,7 +22,7 @@ use arc_swap::ArcSwap; use auth::UserInfoRef; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_catalog::{build_db_string, parse_catalog_and_schema_from_db_string}; -use common_time::timezone::get_timezone; +use common_time::timezone::{get_timezone, parse_timezone}; use common_time::Timezone; use derive_builder::Builder; use sql::dialect::{Dialect, GreptimeDbDialect, MySqlDialect, PostgreSqlDialect}; @@ -89,8 +89,7 @@ impl From<&RegionRequestHeader> for QueryContext { current_catalog: catalog.to_string(), current_schema: schema.to_string(), current_user: Default::default(), - // for request send to datanode, all timestamp have converted to UTC, so timezone is not important - timezone: ArcSwap::new(Arc::new(get_timezone(None).clone())), + timezone: ArcSwap::new(Arc::new(parse_timezone(Some(&value.timezone)))), sql_dialect: Arc::new(GreptimeDbDialect {}), extension: Default::default(), configuration_parameter: Default::default(), diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 9010106d85..e873343bbd 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -83,14 +83,14 @@ impl<'a> ParserContext<'a> { } pub(crate) fn intern_parse_table_name(&mut self) -> Result { - let raw_table_name = self - .parser - .parse_object_name() - .context(error::UnexpectedSnafu { - sql: self.sql, - expected: "a table name", - actual: self.parser.peek_token().to_string(), - })?; + let raw_table_name = + self.parser + .parse_object_name(false) + .context(error::UnexpectedSnafu { + sql: self.sql, + expected: "a table name", + actual: self.parser.peek_token().to_string(), + })?; Ok(Self::canonicalize_object_name(raw_table_name)) } @@ -100,7 +100,7 @@ impl<'a> ParserContext<'a> { .try_with_sql(sql) .context(SyntaxSnafu)?; - let function_name = parser.parse_identifier().context(SyntaxSnafu)?; + let function_name = parser.parse_identifier(false).context(SyntaxSnafu)?; parser .parse_function(ObjectName(vec![function_name])) .context(SyntaxSnafu) @@ -222,6 +222,22 @@ impl<'a> ParserContext<'a> { .collect(), ) } + + /// Simply a shortcut for sqlparser's same name method `parse_object_name`, + /// but with constant argument "false". + /// Because the argument is always "false" for us (it's introduced by BigQuery), + /// we don't want to write it again and again. + pub(crate) fn parse_object_name(&mut self) -> std::result::Result { + self.parser.parse_object_name(false) + } + + /// Simply a shortcut for sqlparser's same name method `parse_identifier`, + /// but with constant argument "false". + /// Because the argument is always "false" for us (it's introduced by BigQuery), + /// we don't want to write it again and again. + pub(crate) fn parse_identifier(&mut self) -> std::result::Result { + self.parser.parse_identifier(false) + } } #[cfg(test)] diff --git a/src/sql/src/parsers/alter_parser.rs b/src/sql/src/parsers/alter_parser.rs index 483c432636..687604e370 100644 --- a/src/sql/src/parsers/alter_parser.rs +++ b/src/sql/src/parsers/alter_parser.rs @@ -33,7 +33,7 @@ impl<'a> ParserContext<'a> { let parser = &mut self.parser; parser.expect_keywords(&[Keyword::ALTER, Keyword::TABLE])?; - let raw_table_name = parser.parse_object_name()?; + let raw_table_name = parser.parse_object_name(false)?; let table_name = Self::canonicalize_object_name(raw_table_name); let alter_operation = if parser.parse_keyword(Keyword::ADD) { @@ -48,7 +48,7 @@ impl<'a> ParserContext<'a> { } else if let Token::Word(word) = parser.peek_token().token { if word.value.to_ascii_uppercase() == "AFTER" { let _ = parser.next_token(); - let name = Self::canonicalize_identifier(parser.parse_identifier()?); + let name = Self::canonicalize_identifier(self.parse_identifier()?); Some(AddColumnLocation::After { column_name: name.value, }) @@ -65,7 +65,7 @@ impl<'a> ParserContext<'a> { } } else if parser.parse_keyword(Keyword::DROP) { if parser.parse_keyword(Keyword::COLUMN) { - let name = Self::canonicalize_identifier(self.parser.parse_identifier()?); + let name = Self::canonicalize_identifier(self.parse_identifier()?); AlterTableOperation::DropColumn { name } } else { return Err(ParserError::ParserError(format!( @@ -74,7 +74,7 @@ impl<'a> ParserContext<'a> { ))); } } else if parser.parse_keyword(Keyword::RENAME) { - let new_table_name_obj_raw = parser.parse_object_name()?; + let new_table_name_obj_raw = self.parse_object_name()?; let new_table_name_obj = Self::canonicalize_object_name(new_table_name_obj_raw); let new_table_name = match &new_table_name_obj.0[..] { [table] => table.value.clone(), @@ -128,7 +128,7 @@ mod tests { location, } => { assert_eq!("tagk_i", column_def.name.value); - assert_eq!(DataType::String, column_def.data_type); + assert_eq!(DataType::String(None), column_def.data_type); assert!(column_def .options .iter() @@ -164,7 +164,7 @@ mod tests { location, } => { assert_eq!("tagk_i", column_def.name.value); - assert_eq!(DataType::String, column_def.data_type); + assert_eq!(DataType::String(None), column_def.data_type); assert!(column_def .options .iter() @@ -200,7 +200,7 @@ mod tests { location, } => { assert_eq!("tagk_i", column_def.name.value); - assert_eq!(DataType::String, column_def.data_type); + assert_eq!(DataType::String(None), column_def.data_type); assert!(column_def .options .iter() diff --git a/src/sql/src/parsers/copy_parser.rs b/src/sql/src/parsers/copy_parser.rs index bd365e51ef..be4b187644 100644 --- a/src/sql/src/parsers/copy_parser.rs +++ b/src/sql/src/parsers/copy_parser.rs @@ -47,14 +47,13 @@ impl<'a> ParserContext<'a> { } fn parser_copy_database(&mut self) -> Result { - let database_name = - self.parser - .parse_object_name() - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a database name", - actual: self.peek_token_as_string(), - })?; + let database_name = self + .parse_object_name() + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a database name", + actual: self.peek_token_as_string(), + })?; let req = if self.parser.parse_keyword(Keyword::TO) { let (with, connection, location) = self.parse_copy_parameters()?; @@ -82,14 +81,13 @@ impl<'a> ParserContext<'a> { } fn parse_copy_table(&mut self) -> Result { - let raw_table_name = - self.parser - .parse_object_name() - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a table name", - actual: self.peek_token_as_string(), - })?; + let raw_table_name = self + .parse_object_name() + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a table name", + actual: self.peek_token_as_string(), + })?; let table_name = Self::canonicalize_object_name(raw_table_name); if self.parser.parse_keyword(Keyword::TO) { diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index ec61a20d53..08764aaa6e 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -111,14 +111,11 @@ impl<'a> ParserContext<'a> { self.parser .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let database_name = self - .parser - .parse_object_name() - .context(error::UnexpectedSnafu { - sql: self.sql, - expected: "a database name", - actual: self.peek_token_as_string(), - })?; + let database_name = self.parse_object_name().context(error::UnexpectedSnafu { + sql: self.sql, + expected: "a database name", + actual: self.peek_token_as_string(), + })?; let database_name = Self::canonicalize_object_name(database_name); Ok(Statement::CreateDatabase(CreateDatabase { name: database_name, @@ -319,6 +316,7 @@ impl<'a> ParserContext<'a> { quote_style: None, }], is_primary: false, + characteristics: None, }; constraints.push(constraint); } @@ -367,7 +365,7 @@ impl<'a> ParserContext<'a> { pub fn parse_column_def(&mut self) -> std::result::Result { let parser = &mut self.parser; - let name = parser.parse_identifier()?; + let name = parser.parse_identifier(false)?; if name.quote_style.is_none() && // "ALL_KEYWORDS" are sorted. ALL_KEYWORDS.binary_search(&name.value.to_uppercase().as_str()).is_ok() @@ -380,14 +378,14 @@ impl<'a> ParserContext<'a> { let data_type = parser.parse_data_type()?; let collation = if parser.parse_keyword(Keyword::COLLATE) { - Some(parser.parse_object_name()?) + Some(parser.parse_object_name(false)?) } else { None }; let mut options = vec![]; loop { if parser.parse_keyword(Keyword::CONSTRAINT) { - let name = Some(parser.parse_identifier()?); + let name = Some(parser.parse_identifier(false)?); if let Some(option) = Self::parse_optional_column_option(parser)? { options.push(ColumnOptionDef { name, option }); } else { @@ -415,7 +413,7 @@ impl<'a> ParserContext<'a> { ) -> std::result::Result, ParserError> { if parser.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { Ok(Some(ColumnOption::CharacterSet( - parser.parse_object_name()?, + parser.parse_object_name(false)?, ))) } else if parser.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { Ok(Some(ColumnOption::NotNull)) @@ -432,9 +430,15 @@ impl<'a> ParserContext<'a> { } else if parser.parse_keyword(Keyword::DEFAULT) { Ok(Some(ColumnOption::Default(parser.parse_expr()?))) } else if parser.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { - Ok(Some(ColumnOption::Unique { is_primary: true })) + Ok(Some(ColumnOption::Unique { + is_primary: true, + characteristics: None, + })) } else if parser.parse_keyword(Keyword::UNIQUE) { - Ok(Some(ColumnOption::Unique { is_primary: false })) + Ok(Some(ColumnOption::Unique { + is_primary: false, + characteristics: None, + })) } else if parser.parse_keywords(&[Keyword::TIME, Keyword::INDEX]) { // Use a DialectSpecific option for time index Ok(Some(ColumnOption::DialectSpecific(vec![ @@ -456,7 +460,7 @@ impl<'a> ParserContext<'a> { fn parse_optional_table_constraint(&mut self) -> Result> { let name = if self.parser.parse_keyword(Keyword::CONSTRAINT) { - let raw_name = self.parser.parse_identifier().context(error::SyntaxSnafu)?; + let raw_name = self.parse_identifier().context(SyntaxSnafu)?; Some(Self::canonicalize_identifier(raw_name)) } else { None @@ -485,6 +489,7 @@ impl<'a> ParserContext<'a> { name, columns, is_primary: true, + characteristics: None, })) } TokenWithLocation { @@ -524,6 +529,7 @@ impl<'a> ParserContext<'a> { }), columns, is_primary: false, + characteristics: None, })) } unexpected => { @@ -568,6 +574,7 @@ fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) - name: Some(ident), columns, is_primary: false, + .. } = c { if ident.value == TIME_INDEX { @@ -857,7 +864,7 @@ mod tests { assert_column_def(&columns[0], "host", "STRING"); assert_column_def(&columns[1], "ts", "TIMESTAMP"); assert_column_def(&columns[2], "cpu", "FLOAT"); - assert_column_def(&columns[3], "memory", "DOUBLE"); + assert_column_def(&columns[3], "memory", "FLOAT64"); let constraints = &c.constraints; assert_matches!( @@ -1108,6 +1115,7 @@ ENGINE=mito"; name, columns, is_primary, + .. } => { assert_eq!(name.unwrap().to_string(), "__time_index"); assert_eq!(columns.len(), 1); @@ -1314,6 +1322,7 @@ ENGINE=mito"; name, columns, is_primary, + .. } => { assert_eq!(name.unwrap().to_string(), "__time_index"); assert_eq!(columns.len(), 1); @@ -1422,7 +1431,7 @@ ENGINE=mito"; assert_column_def(&columns[0], "host", "STRING"); assert_column_def(&columns[1], "ts", "TIMESTAMP"); assert_column_def(&columns[2], "cpu", "FLOAT"); - assert_column_def(&columns[3], "memory", "DOUBLE"); + assert_column_def(&columns[3], "memory", "FLOAT64"); let constraints = &c.constraints; assert_matches!( diff --git a/src/sql/src/parsers/describe_parser.rs b/src/sql/src/parsers/describe_parser.rs index 7ba1ffc003..9525b3edb2 100644 --- a/src/sql/src/parsers/describe_parser.rs +++ b/src/sql/src/parsers/describe_parser.rs @@ -31,8 +31,7 @@ impl<'a> ParserContext<'a> { fn parse_describe_table(&mut self) -> Result { let raw_table_idents = - self.parser - .parse_object_name() + self.parse_object_name() .with_context(|_| error::UnexpectedSnafu { sql: self.sql, expected: "a table name", diff --git a/src/sql/src/parsers/drop_parser.rs b/src/sql/src/parsers/drop_parser.rs index d5d872ee16..8cc62189d8 100644 --- a/src/sql/src/parsers/drop_parser.rs +++ b/src/sql/src/parsers/drop_parser.rs @@ -40,8 +40,7 @@ impl<'a> ParserContext<'a> { let if_exists = self.parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let raw_table_ident = - self.parser - .parse_object_name() + self.parse_object_name() .with_context(|_| error::UnexpectedSnafu { sql: self.sql, expected: "a table name", @@ -62,14 +61,13 @@ impl<'a> ParserContext<'a> { let _ = self.parser.next_token(); let if_exists = self.parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let database_name = - self.parser - .parse_object_name() - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a database name", - actual: self.peek_token_as_string(), - })?; + let database_name = self + .parse_object_name() + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a database name", + actual: self.peek_token_as_string(), + })?; let database_name = Self::canonicalize_object_name(database_name); Ok(Statement::DropDatabase(DropDatabase::new( diff --git a/src/sql/src/parsers/explain_parser.rs b/src/sql/src/parsers/explain_parser.rs index 906bd792ec..57349220fd 100644 --- a/src/sql/src/parsers/explain_parser.rs +++ b/src/sql/src/parsers/explain_parser.rs @@ -13,6 +13,7 @@ // limitations under the License. use snafu::ResultExt; +use sqlparser::ast::DescribeAlias; use crate::error::{self, Result}; use crate::parser::ParserContext; @@ -22,14 +23,14 @@ use crate::statements::statement::Statement; /// EXPLAIN statement parser implementation impl<'a> ParserContext<'a> { pub(crate) fn parse_explain(&mut self) -> Result { - let explain_statement = - self.parser - .parse_explain(false) - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a query statement", - actual: self.peek_token_as_string(), - })?; + let explain_statement = self + .parser + .parse_explain(DescribeAlias::Explain) + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a query statement", + actual: self.peek_token_as_string(), + })?; Ok(Statement::Explain(Explain::try_from(explain_statement)?)) } @@ -80,6 +81,7 @@ mod tests { having: None, qualify: None, named_window: vec![], + value_table_mode: None, }; let sp_statement = SpStatement::Query(Box::new(SpQuery { @@ -87,13 +89,15 @@ mod tests { body: Box::new(sqlparser::ast::SetExpr::Select(Box::new(select))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], + for_clause: None, })); let explain = Explain::try_from(SpStatement::Explain { - describe_alias: false, + describe_alias: DescribeAlias::Explain, analyze: false, verbose: false, statement: Box::new(sp_statement), diff --git a/src/sql/src/parsers/show_parser.rs b/src/sql/src/parsers/show_parser.rs index fb04309f65..afe2f06b71 100644 --- a/src/sql/src/parsers/show_parser.rs +++ b/src/sql/src/parsers/show_parser.rs @@ -59,14 +59,13 @@ impl<'a> ParserContext<'a> { self.unsupported(self.peek_token_as_string()) } } else if self.consume_token("VARIABLES") { - let variable = - self.parser - .parse_object_name() - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a variable name", - actual: self.peek_token_as_string(), - })?; + let variable = self + .parse_object_name() + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a variable name", + actual: self.peek_token_as_string(), + })?; Ok(Statement::ShowVariables(ShowVariables { variable })) } else { self.unsupported(self.peek_token_as_string()) @@ -75,14 +74,13 @@ impl<'a> ParserContext<'a> { /// Parse SHOW CREATE TABLE statement fn parse_show_create_table(&mut self) -> Result { - let raw_table_name = - self.parser - .parse_object_name() - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a table name", - actual: self.peek_token_as_string(), - })?; + let raw_table_name = self + .parse_object_name() + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a table name", + actual: self.peek_token_as_string(), + })?; let table_name = Self::canonicalize_object_name(raw_table_name); ensure!( !table_name.0.is_empty(), @@ -95,14 +93,13 @@ impl<'a> ParserContext<'a> { fn parse_show_table_name(&mut self) -> Result { let _ = self.parser.next_token(); - let table_name = - self.parser - .parse_object_name() - .with_context(|_| error::UnexpectedSnafu { - sql: self.sql, - expected: "a table name", - actual: self.peek_token_as_string(), - })?; + let table_name = self + .parse_object_name() + .with_context(|_| error::UnexpectedSnafu { + sql: self.sql, + expected: "a table name", + actual: self.peek_token_as_string(), + })?; ensure!( table_name.0.len() == 1, @@ -120,7 +117,6 @@ impl<'a> ParserContext<'a> { fn parse_db_name(&mut self) -> Result> { let _ = self.parser.next_token(); let db_name = self - .parser .parse_object_name() .with_context(|_| error::UnexpectedSnafu { sql: self.sql, @@ -182,7 +178,7 @@ impl<'a> ParserContext<'a> { Token::Word(w) => match w.keyword { Keyword::LIKE => { let _ = self.parser.next_token(); - ShowKind::Like(self.parser.parse_identifier().with_context(|_| { + ShowKind::Like(self.parse_identifier().with_context(|_| { error::UnexpectedSnafu { sql: self.sql, expected: "LIKE", @@ -298,7 +294,7 @@ impl<'a> ParserContext<'a> { Token::Word(w) => match w.keyword { Keyword::LIKE => { let _ = self.parser.next_token(); - ShowKind::Like(self.parser.parse_identifier().with_context(|_| { + ShowKind::Like(self.parse_identifier().with_context(|_| { error::UnexpectedSnafu { sql: self.sql, expected: "LIKE", @@ -337,7 +333,7 @@ impl<'a> ParserContext<'a> { } Token::Word(w) => match w.keyword { Keyword::LIKE => Ok(Statement::ShowDatabases(ShowDatabases::new( - ShowKind::Like(self.parser.parse_identifier().with_context(|_| { + ShowKind::Like(self.parse_identifier().with_context(|_| { error::UnexpectedSnafu { sql: self.sql, expected: "LIKE", diff --git a/src/sql/src/parsers/tql_parser.rs b/src/sql/src/parsers/tql_parser.rs index a681ca1001..8ce89812f1 100644 --- a/src/sql/src/parsers/tql_parser.rs +++ b/src/sql/src/parsers/tql_parser.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use chrono::Utc; +use datafusion::execution::context::SessionState; use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext}; use datafusion_common::config::ConfigOptions; use datafusion_common::{DFSchema, Result as DFResult, ScalarValue, TableReference}; @@ -205,7 +206,7 @@ impl<'a> ParserContext<'a> { fn parse_to_logical_expr(expr: sqlparser::ast::Expr) -> std::result::Result { let empty_df_schema = DFSchema::empty(); - SqlToRel::new(&StubContextProvider {}) + SqlToRel::new(&StubContextProvider::default()) .sql_to_expr(expr.into(), &empty_df_schema, &mut Default::default()) .context(ConvertToLogicalExpressionSnafu) } @@ -262,20 +263,29 @@ impl<'a> ParserContext<'a> { } } -#[derive(Default)] -struct StubContextProvider {} +struct StubContextProvider { + state: SessionState, +} + +impl Default for StubContextProvider { + fn default() -> Self { + Self { + state: SessionState::new_with_config_rt(Default::default(), Default::default()), + } + } +} impl ContextProvider for StubContextProvider { - fn get_table_provider(&self, _name: TableReference) -> DFResult> { + fn get_table_source(&self, _name: TableReference) -> DFResult> { unimplemented!() } - fn get_function_meta(&self, _name: &str) -> Option> { - None + fn get_function_meta(&self, name: &str) -> Option> { + self.state.scalar_functions().get(name).cloned() } - fn get_aggregate_meta(&self, _name: &str) -> Option> { - unimplemented!() + fn get_aggregate_meta(&self, name: &str) -> Option> { + self.state.aggregate_functions().get(name).cloned() } fn get_window_meta(&self, _name: &str) -> Option> { @@ -289,6 +299,18 @@ impl ContextProvider for StubContextProvider { fn options(&self) -> &ConfigOptions { unimplemented!() } + + fn udfs_names(&self) -> Vec { + self.state.scalar_functions().keys().cloned().collect() + } + + fn udafs_names(&self) -> Vec { + self.state.aggregate_functions().keys().cloned().collect() + } + + fn udwfs_names(&self) -> Vec { + self.state.window_functions().keys().cloned().collect() + } } #[cfg(test)] diff --git a/src/sql/src/parsers/truncate_parser.rs b/src/sql/src/parsers/truncate_parser.rs index d4ac0c4d14..b71c85a5e8 100644 --- a/src/sql/src/parsers/truncate_parser.rs +++ b/src/sql/src/parsers/truncate_parser.rs @@ -27,8 +27,7 @@ impl<'a> ParserContext<'a> { let _ = self.parser.parse_keyword(Keyword::TABLE); let raw_table_ident = - self.parser - .parse_object_name() + self.parse_object_name() .with_context(|_| error::UnexpectedSnafu { sql: self.sql, expected: "a table name", diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index d51378c098..de35b71a90 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -345,7 +345,7 @@ pub fn has_primary_key_option(column_def: &ColumnDef) -> bool { .options .iter() .any(|options| match options.option { - ColumnOption::Unique { is_primary } => is_primary, + ColumnOption::Unique { is_primary, .. } => is_primary, _ => false, }) } @@ -414,10 +414,15 @@ pub fn sql_column_def_to_grpc_column_def( .context(ConvertToGrpcDataTypeSnafu)? .to_parts(); - let is_primary_key = col - .options - .iter() - .any(|o| matches!(o.option, ColumnOption::Unique { is_primary: true })); + let is_primary_key = col.options.iter().any(|o| { + matches!( + o.option, + ColumnOption::Unique { + is_primary: true, + .. + } + ) + }); let semantic_type = if is_primary_key { SemanticType::Tag @@ -438,7 +443,7 @@ pub fn sql_column_def_to_grpc_column_def( pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result { match data_type { - SqlDataType::BigInt(_) => Ok(ConcreteDataType::int64_datatype()), + SqlDataType::BigInt(_) | SqlDataType::Int64 => Ok(ConcreteDataType::int64_datatype()), SqlDataType::UnsignedBigInt(_) => Ok(ConcreteDataType::uint64_datatype()), SqlDataType::Int(_) | SqlDataType::Integer(_) => Ok(ConcreteDataType::int32_datatype()), SqlDataType::UnsignedInt(_) | SqlDataType::UnsignedInteger(_) => { @@ -453,9 +458,9 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result Ok(ConcreteDataType::string_datatype()), + | SqlDataType::String(_) => Ok(ConcreteDataType::string_datatype()), SqlDataType::Float(_) => Ok(ConcreteDataType::float32_datatype()), - SqlDataType::Double => Ok(ConcreteDataType::float64_datatype()), + SqlDataType::Double | SqlDataType::Float64 => Ok(ConcreteDataType::float64_datatype()), SqlDataType::Boolean => Ok(ConcreteDataType::boolean_datatype()), SqlDataType::Date => Ok(ConcreteDataType::date_datatype()), SqlDataType::Binary(_) @@ -502,7 +507,7 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu ConcreteDataType::UInt16(_) => Ok(SqlDataType::UnsignedSmallInt(None)), ConcreteDataType::Int8(_) => Ok(SqlDataType::TinyInt(None)), ConcreteDataType::UInt8(_) => Ok(SqlDataType::UnsignedTinyInt(None)), - ConcreteDataType::String(_) => Ok(SqlDataType::String), + ConcreteDataType::String(_) => Ok(SqlDataType::String(None)), ConcreteDataType::Float32(_) => Ok(SqlDataType::Float(None)), ConcreteDataType::Float64(_) => Ok(SqlDataType::Double), ConcreteDataType::Boolean(_) => Ok(SqlDataType::Boolean), @@ -588,7 +593,10 @@ mod tests { ConcreteDataType::string_datatype(), ); check_type(SqlDataType::Text, ConcreteDataType::string_datatype()); - check_type(SqlDataType::String, ConcreteDataType::string_datatype()); + check_type( + SqlDataType::String(None), + ConcreteDataType::string_datatype(), + ); check_type( SqlDataType::Float(None), ConcreteDataType::float32_datatype(), @@ -966,7 +974,10 @@ mod tests { collation: None, options: vec![ColumnOptionDef { name: None, - option: ColumnOption::Unique { is_primary: true }, + option: ColumnOption::Unique { + is_primary: true, + characteristics: None, + }, }], }; @@ -1044,7 +1055,10 @@ mod tests { collation: None, options: vec![ColumnOptionDef { name: None, - option: ColumnOption::Unique { is_primary: true }, + option: ColumnOption::Unique { + is_primary: true, + characteristics: None, + }, }], }; assert!(has_primary_key_option(&column_def)); @@ -1081,7 +1095,7 @@ mod tests { let column_def = ColumnDef { name: "col2".into(), - data_type: SqlDataType::String, + data_type: SqlDataType::String(None), collation: None, options: vec![ ColumnOptionDef { diff --git a/src/sql/src/statements/insert.rs b/src/sql/src/statements/insert.rs index 81ef55734d..4eae7f1e18 100644 --- a/src/sql/src/statements/insert.rs +++ b/src/sql/src/statements/insert.rs @@ -55,10 +55,10 @@ impl Insert { match &self.inner { Statement::Insert { source: - box Query { + Some(box Query { body: box SetExpr::Values(Values { rows, .. }), .. - }, + }), .. } => sql_exprs_to_values(rows), _ => unreachable!(), @@ -71,10 +71,10 @@ impl Insert { match &self.inner { Statement::Insert { source: - box Query { + Some(box Query { body: box SetExpr::Values(Values { rows, .. }), .. - }, + }), .. } => rows.iter().all(|es| { es.iter().all(|expr| match expr { @@ -100,7 +100,8 @@ impl Insert { pub fn query_body(&self) -> Result> { Ok(match &self.inner { Statement::Insert { - source: box query, .. + source: Some(box query), + .. } => Some(query.clone().try_into()?), _ => None, }) diff --git a/src/sql/src/statements/transform/type_alias.rs b/src/sql/src/statements/transform/type_alias.rs index be61f8baec..353c19f68c 100644 --- a/src/sql/src/statements/transform/type_alias.rs +++ b/src/sql/src/statements/transform/type_alias.rs @@ -58,28 +58,56 @@ impl TransformRule for TypeAliasTransformRule { } fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> { + fn cast_expr_to_arrow_cast_func(expr: Expr, cast_type: String) -> Function { + Function { + name: ObjectName(vec![Ident::new("arrow_cast")]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString(cast_type), + ))), + ], + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + } + } + match expr { + // In new sqlparser, the "INT64" is no longer parsed to custom datatype. + // The new "Int64" is not recognizable by Datafusion, cannot directly "CAST" to it. + // We have to replace the expr to "arrow_cast" function call here. + // Same for "FLOAT64". + Expr::Cast { + expr: cast_expr, + data_type, + .. + } if matches!(data_type, DataType::Int64 | DataType::Float64) => { + if let Some(new_type) = get_data_type_by_alias_name(&data_type.to_string()) { + if let Ok(new_type) = sql_data_type_to_concrete_data_type(&new_type) { + *expr = Expr::Function(cast_expr_to_arrow_cast_func( + (**cast_expr).clone(), + new_type.as_arrow_type().to_string(), + )); + } + } + } + // Type alias Expr::Cast { data_type: DataType::Custom(name, tokens), expr: cast_expr, + .. } if name.0.len() == 1 && tokens.is_empty() => { if let Some(new_type) = get_data_type_by_alias_name(name.0[0].value.as_str()) { - if let Ok(concrete_type) = sql_data_type_to_concrete_data_type(&new_type) { - let new_type = concrete_type.as_arrow_type(); - *expr = Expr::Function(Function { - name: ObjectName(vec![Ident::new("arrow_cast")]), - args: vec![ - FunctionArg::Unnamed(FunctionArgExpr::Expr((**cast_expr).clone())), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - Value::SingleQuotedString(new_type.to_string()), - ))), - ], - over: None, - distinct: false, - special: false, - order_by: vec![], - }); + if let Ok(new_type) = sql_data_type_to_concrete_data_type(&new_type) { + *expr = Expr::Function(cast_expr_to_arrow_cast_func( + (**cast_expr).clone(), + new_type.as_arrow_type().to_string(), + )); } } } @@ -89,24 +117,16 @@ impl TransformRule for TypeAliasTransformRule { Expr::Cast { data_type: DataType::Timestamp(precision, zone), expr: cast_expr, + .. } => { if let Ok(concrete_type) = sql_data_type_to_concrete_data_type(&DataType::Timestamp(*precision, *zone)) { let new_type = concrete_type.as_arrow_type(); - *expr = Expr::Function(Function { - name: ObjectName(vec![Ident::new("arrow_cast")]), - args: vec![ - FunctionArg::Unnamed(FunctionArgExpr::Expr((**cast_expr).clone())), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - Value::SingleQuotedString(new_type.to_string()), - ))), - ], - over: None, - distinct: false, - special: false, - order_by: vec![], - }); + *expr = Expr::Function(cast_expr_to_arrow_cast_func( + (**cast_expr).clone(), + new_type.to_string(), + )); } } @@ -353,10 +373,10 @@ CREATE TABLE data_types ( tint INT8, sint SMALLINT, i INT, - bint BIGINT, + bint INT64, v VARCHAR, f FLOAT, - d DOUBLE, + d FLOAT64, b BOOLEAN, vb VARBINARY, dt DATE, diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs index 9a931e02cb..c9c1352252 100644 --- a/src/sql/src/util.rs +++ b/src/sql/src/util.rs @@ -17,7 +17,7 @@ use std::fmt::{Display, Formatter}; use std::sync::LazyLock; use regex::Regex; -use sqlparser::ast::{ObjectName, SqlOption, Value}; +use sqlparser::ast::{Expr, ObjectName, SqlOption, Value}; static SQL_SECRET_PATTERNS: LazyLock> = LazyLock::new(|| { vec![ @@ -47,9 +47,11 @@ pub fn format_raw_object_name(name: &ObjectName) -> String { format!("{}", Inner { name }) } -pub fn parse_option_string(value: Value) -> Option { +pub fn parse_option_string(value: Expr) -> Option { match value { - Value::SingleQuotedString(v) | Value::DoubleQuotedString(v) => Some(v), + Expr::Value(Value::SingleQuotedString(v)) | Expr::Value(Value::DoubleQuotedString(v)) => { + Some(v) + } _ => None, } } @@ -60,7 +62,9 @@ pub fn to_lowercase_options_map(opts: &[SqlOption]) -> HashMap { let mut map = HashMap::with_capacity(opts.len()); for SqlOption { name, value } in opts { let value_str = match value { - Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => s.clone(), + Expr::Value(Value::SingleQuotedString(s)) + | Expr::Value(Value::DoubleQuotedString(s)) => s.clone(), + Expr::Identifier(i) => i.value.clone(), _ => value.to_string(), }; let _ = map.insert(name.value.to_lowercase().clone(), value_str); diff --git a/src/table/src/predicate.rs b/src/table/src/predicate.rs index bd5d6d2d18..f704792626 100644 --- a/src/table/src/predicate.rs +++ b/src/table/src/predicate.rs @@ -89,8 +89,7 @@ impl Predicate { .exprs .iter() .filter_map(|expr| { - create_physical_expr(expr.df_expr(), df_schema.as_ref(), schema, execution_props) - .ok() + create_physical_expr(expr.df_expr(), df_schema.as_ref(), execution_props).ok() }) .collect::>()) } @@ -284,7 +283,11 @@ impl<'a> TimeRangePredicateBuilder<'a> { | Operator::BitwiseShiftLeft | Operator::StringConcat | Operator::ArrowAt - | Operator::AtArrow => None, + | Operator::AtArrow + | Operator::LikeMatch + | Operator::ILikeMatch + | Operator::NotLikeMatch + | Operator::NotILikeMatch => None, } } diff --git a/src/table/src/predicate/stats.rs b/src/table/src/predicate/stats.rs index 4d707a82e9..e1cc48c320 100644 --- a/src/table/src/predicate/stats.rs +++ b/src/table/src/predicate/stats.rs @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; use std::sync::Arc; use datafusion::parquet::file::metadata::RowGroupMetaData; use datafusion::parquet::file::statistics::Statistics as ParquetStats; use datafusion::physical_optimizer::pruning::PruningStatistics; use datafusion_common::{Column, ScalarValue}; -use datatypes::arrow::array::{ArrayRef, UInt64Array}; +use datatypes::arrow::array::{ArrayRef, BooleanArray, UInt64Array}; use datatypes::arrow::datatypes::DataType; use paste::paste; @@ -115,4 +116,14 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { } Some(Arc::new(UInt64Array::from(values))) } + + fn row_counts(&self, _column: &Column) -> Option { + // TODO(LFC): Impl it. + None + } + + fn contained(&self, _column: &Column, _values: &HashSet) -> Option { + // TODO(LFC): Impl it. + None + } } diff --git a/src/table/src/table/scan.rs b/src/table/src/table/scan.rs index 0d5b769136..4eb907063e 100644 --- a/src/table/src/table/scan.rs +++ b/src/table/src/table/scan.rs @@ -28,7 +28,8 @@ use common_telemetry::tracing::Span; use common_telemetry::tracing_context::TracingContext; use datafusion::execution::context::TaskContext; use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet}; -use datafusion_physical_expr::PhysicalSortExpr; +use datafusion::physical_plan::{ExecutionMode, PlanProperties}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortExpr}; use datatypes::schema::SchemaRef; use futures::{Stream, StreamExt}; use snafu::OptionExt; @@ -41,6 +42,7 @@ pub struct StreamScanAdapter { schema: SchemaRef, output_ordering: Option>, metric: ExecutionPlanMetricsSet, + properties: PlanProperties, } impl Debug for StreamScanAdapter { @@ -55,12 +57,17 @@ impl Debug for StreamScanAdapter { impl StreamScanAdapter { pub fn new(stream: SendableRecordBatchStream) -> Self { let schema = stream.schema(); - + let properties = PlanProperties::new( + EquivalenceProperties::new(schema.arrow_schema().clone()), + Partitioning::UnknownPartitioning(1), + ExecutionMode::Bounded, + ); Self { stream: Mutex::new(Some(stream)), schema, output_ordering: None, metric: ExecutionPlanMetricsSet::new(), + properties, } } @@ -79,12 +86,8 @@ impl PhysicalPlan for StreamScanAdapter { self.schema.clone() } - fn output_partitioning(&self) -> Partitioning { - Partitioning::UnknownPartitioning(1) - } - - fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.output_ordering.as_deref() + fn properties(&self) -> &PlanProperties { + &self.properties } fn children(&self) -> Vec { @@ -198,8 +201,15 @@ mod test { let stream = recordbatches.as_stream(); let scan = StreamScanAdapter::new(stream); - - assert_eq!(scan.schema(), schema); + let actual: SchemaRef = Arc::new( + scan.properties + .eq_properties + .schema() + .clone() + .try_into() + .unwrap(), + ); + assert_eq!(actual, schema); let stream = scan.execute(0, ctx.task_ctx()).unwrap(); let recordbatches = util::collect(stream).await.unwrap(); diff --git a/src/table/src/table_reference.rs b/src/table/src/table_reference.rs index fb1f6bc63a..3181efddd7 100644 --- a/src/table/src/table_reference.rs +++ b/src/table/src/table_reference.rs @@ -50,7 +50,7 @@ impl<'a> Display for TableReference<'a> { } } -impl<'a> From> for DfTableReference<'a> { +impl<'a> From> for DfTableReference { fn from(val: TableReference<'a>) -> Self { DfTableReference::full(val.catalog, val.schema, val.table) } diff --git a/tests-integration/src/instance.rs b/tests-integration/src/instance.rs index 90c66e15e9..87a1ea476f 100644 --- a/tests-integration/src/instance.rs +++ b/tests-integration/src/instance.rs @@ -180,14 +180,14 @@ mod tests { let batches = common_recordbatch::util::collect_batches(s).await.unwrap(); let pretty_print = batches.pretty_print().unwrap(); let expected = "\ -+-------+---------------------+-------------+-----------+-----------+ -| host | ts | cpu | memory | disk_util | -+-------+---------------------+-------------+-----------+-----------+ -| 490 | 2013-12-31T16:00:00 | 0.1 | 1.0 | 9.9 | -| 550-A | 2022-12-31T16:00:00 | 1.0 | 100.0 | 9.9 | -| 550-W | 2023-12-31T16:00:00 | 10000.0 | 1000000.0 | 9.9 | -| MOSS | 2043-12-31T16:00:00 | 100000000.0 | 1.0e10 | 9.9 | -+-------+---------------------+-------------+-----------+-----------+"; ++-------+---------------------+-------------+---------------+-----------+ +| host | ts | cpu | memory | disk_util | ++-------+---------------------+-------------+---------------+-----------+ +| 490 | 2013-12-31T16:00:00 | 0.1 | 1.0 | 9.9 | +| 550-A | 2022-12-31T16:00:00 | 1.0 | 100.0 | 9.9 | +| 550-W | 2023-12-31T16:00:00 | 10000.0 | 1000000.0 | 9.9 | +| MOSS | 2043-12-31T16:00:00 | 100000000.0 | 10000000000.0 | 9.9 | ++-------+---------------------+-------------+---------------+-----------+"; assert_eq!(pretty_print, expected); } diff --git a/tests-integration/src/tests/instance_test.rs b/tests-integration/src/tests/instance_test.rs index 622bb3d861..dd6fbccb1a 100644 --- a/tests-integration/src/tests/instance_test.rs +++ b/tests-integration/src/tests/instance_test.rs @@ -411,11 +411,9 @@ async fn test_execute_insert_by_select(instance: Arc) { assert!(matches!( try_execute_sql(&instance, "insert into demo2(ts) select memory from demo1") .await - .unwrap_err(), - Error::TableOperation { - source: OperatorError::PlanStatement { .. }, - .. - } + .unwrap() + .data, + OutputData::AffectedRows(2), )); let output = execute_sql(&instance, "insert into demo2 select * from demo1") @@ -427,12 +425,14 @@ async fn test_execute_insert_by_select(instance: Arc) { .await .data; let expected = "\ -+-------+------+--------+---------------------+ -| host | cpu | memory | ts | -+-------+------+--------+---------------------+ -| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 | -| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 | -+-------+------+--------+---------------------+"; ++-------+------+--------+-------------------------+ +| host | cpu | memory | ts | ++-------+------+--------+-------------------------+ +| | | | 1970-01-01T00:00:00.333 | +| | | | 1970-01-01T00:00:01.024 | +| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 | +| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 | ++-------+------+--------+-------------------------+"; check_output_stream(output, expected).await; } @@ -727,7 +727,7 @@ async fn test_execute_query_external_table_parquet(instance: Arc) | 4 | 4.4 | | false | | | 1970-01-01T00:00:00 | | 5 | 6.6 | | false | 1990-01-01 | 1990-01-01T03:00:00 | 1970-01-01T00:00:00 | | 4 | 4000000.0 | | false | | | 1970-01-01T00:00:00 | -| 4 | 4.0e-6 | | false | | | 1970-01-01T00:00:00 | +| 4 | 4e-6 | | false | | | 1970-01-01T00:00:00 | +-------+-----------+----------+--------+------------+---------------------+---------------------+"; check_output_stream(output, expect).await; } @@ -1054,22 +1054,22 @@ async fn test_execute_query_external_table_json_with_schema(instance: Arc = DateTime::from_naive_utc_and_offset( - NaiveDateTime::from_timestamp_opt(60, i).unwrap(), + chrono::DateTime::from_timestamp(60, i).unwrap().naive_utc(), Utc, ); let d = NaiveDate::from_yo_opt(2015, 100).unwrap(); @@ -179,7 +179,9 @@ pub async fn test_mysql_crud(store_type: StorageType) { let expected_d = NaiveDate::from_yo_opt(2015, 100).unwrap(); assert_eq!(expected_d, d); let expected_dt: DateTime = DateTime::from_naive_utc_and_offset( - NaiveDateTime::from_timestamp_opt(60, i as u32).unwrap(), + chrono::DateTime::from_timestamp(60, i as u32) + .unwrap() + .naive_utc(), Utc, ); assert_eq!( @@ -360,7 +362,7 @@ pub async fn test_postgres_crud(store_type: StorageType) { for i in 0..10 { let d = NaiveDate::from_yo_opt(2015, 100).unwrap(); - let dt = d.and_hms_opt(0, 0, 0).unwrap().timestamp_millis(); + let dt = d.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp_millis(); sqlx::query("insert into demo values($1, $2, $3, $4)") .bind(i) diff --git a/tests/cases/distributed/explain/join_10_tables.result b/tests/cases/distributed/explain/join_10_tables.result index 0fe8bebfb6..8b40f21fc6 100644 --- a/tests/cases/distributed/explain/join_10_tables.result +++ b/tests/cases/distributed/explain/join_10_tables.result @@ -75,6 +75,7 @@ limit 1; +-+-+ | logical_plan_| Limit: skip=0, fetch=1_| |_|_Sort: t_1.ts DESC NULLS FIRST, fetch=1_| +|_|_Projection: t_1.ts, t_1.vin, t_1.val, t_2.ts, t_2.vin, t_2.val, t_3.ts, t_3.vin, t_3.val, t_4.ts, t_4.vin, t_4.val, t_5.ts, t_5.vin, t_5.val, t_6.ts, t_6.vin, t_6.val, t_7.ts, t_7.vin, t_7.val, t_8.ts, t_8.vin, t_8.val, t_9.ts, t_9.vin, t_9.val, t_10.ts, t_10.vin, t_10.val | |_|_Inner Join: t_9.ts = t_10.ts, t_9.vin = t_10.vin_| |_|_Inner Join: t_8.ts = t_9.ts, t_8.vin = t_9.vin_| |_|_Inner Join: t_7.ts = t_8.ts, t_7.vin = t_8.vin_| diff --git a/tests/cases/distributed/explain/subqueries.result b/tests/cases/distributed/explain/subqueries.result index 5d0857b091..593e745e1d 100644 --- a/tests/cases/distributed/explain/subqueries.result +++ b/tests/cases/distributed/explain/subqueries.result @@ -13,9 +13,11 @@ EXPLAIN SELECT * FROM integers WHERE i IN ((SELECT i FROM integers)) ORDER BY i; | plan_type_| plan_| +-+-+ | logical_plan_| Sort: integers.i ASC NULLS LAST_| +|_|_Projection: integers.i, integers.j_| |_|_LeftSemi Join: integers.i = __correlated_sq_1.i_| |_|_MergeScan [is_placeholder=false]_| |_|_SubqueryAlias: __correlated_sq_1_| +|_|_Projection: integers.i_| |_|_MergeScan [is_placeholder=false]_| | physical_plan | SortPreservingMergeExec: [i@0 ASC NULLS LAST]_| |_|_SortExec: expr=[i@0 ASC NULLS LAST]_| @@ -43,6 +45,7 @@ EXPLAIN SELECT * FROM integers i1 WHERE EXISTS(SELECT i FROM integers WHERE i=i1 | plan_type_| plan_| +-+-+ | logical_plan_| Sort: i1.i ASC NULLS LAST_| +|_|_Projection: i1.i, i1.j_| |_|_LeftSemi Join: i1.i = __correlated_sq_1.i_| |_|_SubqueryAlias: i1_| |_|_MergeScan [is_placeholder=false]_| @@ -86,24 +89,24 @@ order by t.i desc; +-+-+ | logical_plan_| Sort: t.i DESC NULLS FIRST_| |_|_SubqueryAlias: t_| -|_|_Inner Join:_| -|_|_Projection:_| +|_|_CrossJoin:_| +|_|_Filter: integers.i IS NOT NULL_| +|_|_Projection: integers.i_| |_|_MergeScan [is_placeholder=false]_| -|_|_Filter: other.i IS NOT NULL_| -|_|_Projection: other.i_| +|_|_Projection:_| |_|_MergeScan [is_placeholder=false]_| | physical_plan | SortPreservingMergeExec: [i@0 DESC]_| |_|_SortExec: expr=[i@0 DESC]_| -|_|_NestedLoopJoinExec: join_type=Inner_| -|_|_RepartitionExec: partitioning=REDACTED -|_|_ProjectionExec: expr=[]_| -|_|_MergeScanExec: REDACTED +|_|_CrossJoinExec_| |_|_CoalescePartitionsExec_| |_|_CoalesceBatchesExec: target_batch_size=8192_| |_|_FilterExec: i@0 IS NOT NULL_| |_|_RepartitionExec: partitioning=REDACTED |_|_ProjectionExec: expr=[i@0 as i]_| |_|_MergeScanExec: REDACTED +|_|_RepartitionExec: partitioning=REDACTED +|_|_ProjectionExec: expr=[]_| +|_|_MergeScanExec: REDACTED |_|_| +-+-+ @@ -118,7 +121,8 @@ EXPLAIN INSERT INTO other SELECT i, 2 FROM integers WHERE i=(SELECT MAX(i) FROM | | Projection: integers.i | | | MergeScan [is_placeholder=false] | | | SubqueryAlias: __scalar_sq_1 | -| | MergeScan [is_placeholder=false] | +| | Projection: MAX(integers.i) | +| | MergeScan [is_placeholder=false] | +--------------+-------------------------------------------------------------------+ drop table other; diff --git a/tests/cases/distributed/optimizer/filter_push_down.result b/tests/cases/distributed/optimizer/filter_push_down.result index 67370b04e4..fa4dab8a9a 100644 --- a/tests/cases/distributed/optimizer/filter_push_down.result +++ b/tests/cases/distributed/optimizer/filter_push_down.result @@ -225,19 +225,47 @@ SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHER | 3 | +---+ --- TODO(LFC): Somehow the following SQL does not order by column 1 under new DataFusion occasionally. Should further investigate it. Comment it out temporarily. --- expected: --- +---+---+--------------+ --- | a | b | ROW_NUMBER() | --- +---+---+--------------+ --- | 1 | 1 | 1 | --- | 2 | 2 | 5 | --- | 3 | 3 | 9 | --- +---+---+--------------+ --- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; -SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1; +SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; -Error: 3001(EngineExecuteQuery), Invalid argument error: must either specify a row count or at least one column ++---+---+--------------------------------------------------------------------------------------------------------------------+ +| a | b | ROW_NUMBER() ORDER BY [i1.i ASC NULLS LAST, i2.i ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW | ++---+---+--------------------------------------------------------------------------------------------------------------------+ +| 1 | 1 | 1 | +| 2 | 2 | 5 | +| 3 | 3 | 9 | ++---+---+--------------------------------------------------------------------------------------------------------------------+ + +-- The "0=1" will be evaluated as a constant expression that is always false, and will be optimized away in the query +-- engine. In the final plan, there's no filter node. We explain it to ensure that. +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (peers.*) REDACTED +EXPLAIN SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1; + ++-+-+ +| plan_type_| plan_| ++-+-+ +| logical_plan_| Sort: a1.cond ASC NULLS LAST_| +|_|_SubqueryAlias: a1_| +|_|_Projection: Boolean(false) AS cond_| +|_|_CrossJoin:_| +|_|_SubqueryAlias: i1_| +|_|_Projection:_| +|_|_MergeScan [is_placeholder=false]_| +|_|_SubqueryAlias: i2_| +|_|_Projection:_| +|_|_MergeScan [is_placeholder=false]_| +| physical_plan | CoalescePartitionsExec_| +|_|_ProjectionExec: expr=[false as cond]_| +|_|_CrossJoinExec_| +|_|_ProjectionExec: expr=[]_| +|_|_MergeScanExec: REDACTED +|_|_RepartitionExec: partitioning=REDACTED +|_|_ProjectionExec: expr=[]_| +|_|_MergeScanExec: REDACTED +|_|_| ++-+-+ SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1; diff --git a/tests/cases/distributed/optimizer/filter_push_down.sql b/tests/cases/distributed/optimizer/filter_push_down.sql index 4870ddcb1c..c34551f25e 100644 --- a/tests/cases/distributed/optimizer/filter_push_down.sql +++ b/tests/cases/distributed/optimizer/filter_push_down.sql @@ -44,18 +44,15 @@ SELECT * FROM (SELECT DISTINCT i1.i AS a, i2.i AS b FROM integers i1, integers i SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHERE i=3; --- TODO(LFC): Somehow the following SQL does not order by column 1 under new DataFusion occasionally. Should further investigate it. Comment it out temporarily. --- expected: --- +---+---+--------------+ --- | a | b | ROW_NUMBER() | --- +---+---+--------------+ --- | 1 | 1 | 1 | --- | 2 | 2 | 5 | --- | 3 | 3 | 9 | --- +---+---+--------------+ --- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; +SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; -SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1; +-- The "0=1" will be evaluated as a constant expression that is always false, and will be optimized away in the query +-- engine. In the final plan, there's no filter node. We explain it to ensure that. +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (peers.*) REDACTED +EXPLAIN SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1; SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1; diff --git a/tests/cases/standalone/common/aggregate/distinct_order_by.result b/tests/cases/standalone/common/aggregate/distinct_order_by.result index 996048bed8..29028359e0 100644 --- a/tests/cases/standalone/common/aggregate/distinct_order_by.result +++ b/tests/cases/standalone/common/aggregate/distinct_order_by.result @@ -29,7 +29,7 @@ Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: For SELECT DI SELECT DISTINCT ON (1) i % 2, i FROM integers WHERE i<3 ORDER BY i; -Error: 3000(PlanQuery), Failed to plan SQL: This feature is not implemented: DISTINCT ON Exprs not supported +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: SELECT DISTINCT ON expressions must match initial ORDER BY expressions SELECT DISTINCT integers.i FROM integers ORDER BY i DESC; diff --git a/tests/cases/standalone/common/aggregate/sum.result b/tests/cases/standalone/common/aggregate/sum.result index 526fce1757..4c54f1bef8 100644 --- a/tests/cases/standalone/common/aggregate/sum.result +++ b/tests/cases/standalone/common/aggregate/sum.result @@ -24,11 +24,7 @@ SELECT SUM(-1) FROM numbers; SELECT SUM(-1) FROM numbers WHERE number=-1; -+----------------+ -| SUM(Int64(-1)) | -+----------------+ -| | -+----------------+ +Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value -1 to type UInt32 SELECT SUM(-1) FROM numbers WHERE number>10000 limit 1000; @@ -64,11 +60,11 @@ Affected Rows: 4 SELECT sum(n) from doubles; -+----------------------+ -| SUM(doubles.n) | -+----------------------+ -| 9.007199254740992e15 | -+----------------------+ ++--------------------+ +| SUM(doubles.n) | ++--------------------+ +| 9007199254740992.0 | ++--------------------+ DROP TABLE bigints; diff --git a/tests/cases/standalone/common/cte/cte.result b/tests/cases/standalone/common/cte/cte.result index 2f143c964a..3982a3a857 100644 --- a/tests/cases/standalone/common/cte/cte.result +++ b/tests/cases/standalone/common/cte/cte.result @@ -57,7 +57,7 @@ with cte1 as (select i as j from a), cte2 as (select ref.j as k from cte1 as ref with cte1 as (select 42), cte1 as (select 42) select * FROM cte1; -Error: 3000(PlanQuery), Failed to plan SQL: sql parser error: WITH query name "cte1" specified more than once +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: WITH query name "cte1" specified more than once -- reference to CTE before its actually defined, it's not supported by datafusion with cte3 as (select ref2.j as i from cte1 as ref2), cte1 as (Select i as j from a), cte2 as (select ref.j+1 as k from cte1 as ref) select * from cte2 union all select * FROM cte3; @@ -109,7 +109,7 @@ WITH RECURSIVE cte(d) AS ( ) SELECT max(d) FROM cte; -Error: 3000(PlanQuery), Failed to plan SQL: This feature is not implemented: Recursive CTEs are not supported +Error: 3000(PlanQuery), Failed to plan SQL: This feature is not implemented: Recursive CTE is not implemented -- Nested aliases is not supported in datafusion with cte (a) as ( diff --git a/tests/cases/standalone/common/cte/cte_in_cte.result b/tests/cases/standalone/common/cte/cte_in_cte.result index b2380be7fc..3c8d6010c8 100644 --- a/tests/cases/standalone/common/cte/cte_in_cte.result +++ b/tests/cases/standalone/common/cte/cte_in_cte.result @@ -40,7 +40,7 @@ with cte1 as (with b as (Select i as j from a) select j from b), cte2 as (with c with cte1 as (select 42), cte1 as (select 42) select * FROM cte1; -Error: 3000(PlanQuery), Failed to plan SQL: sql parser error: WITH query name "cte1" specified more than once +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: WITH query name "cte1" specified more than once with cte1 as (Select i as j from a) select * from (with cte2 as (select max(j) as j from cte1) select * from cte2) f; @@ -64,7 +64,7 @@ with cte1 as (Select i as j from a) select * from cte1 where j = (with cte2 as ( -- this feature is not implemented in datafusion with cte as (Select i as j from a) select * from cte where j = (with cte as (select max(j) as j from cte) select j from cte); -Error: 3000(PlanQuery), Failed to plan SQL: sql parser error: WITH query name "cte" specified more than once +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: WITH query name "cte" specified more than once -- self-refer to non-existent cte- with cte as (select * from cte) select * from cte; diff --git a/tests/cases/standalone/common/function/arithmetic.result b/tests/cases/standalone/common/function/arithmetic.result index 563053fbce..20c612d56a 100644 --- a/tests/cases/standalone/common/function/arithmetic.result +++ b/tests/cases/standalone/common/function/arithmetic.result @@ -24,7 +24,7 @@ SELECT MOD(18.0, 4.0); SELECT MOD(18, 0); -Error: 3001(EngineExecuteQuery), DataFusion error: Divide by zero error +Error: 3001(EngineExecuteQuery), Divide by zero error SELECT POW (2, 5); @@ -76,5 +76,5 @@ SELECT CLAMP(0.5, 0, 1); SELECT CLAMP(10, 1, 0); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: The second arg should be less than or equal to the third arg, have: ConstantVector([Int64(1); 1]), ConstantVector([Int64(0); 1]) +Error: 3001(EngineExecuteQuery), Invalid function args: The second arg should be less than or equal to the third arg, have: ConstantVector([Int64(1); 1]), ConstantVector([Int64(0); 1]) diff --git a/tests/cases/standalone/common/function/date.result b/tests/cases/standalone/common/function/date.result index a94cff3cd3..27b9a981ae 100644 --- a/tests/cases/standalone/common/function/date.result +++ b/tests/cases/standalone/common/function/date.result @@ -1,19 +1,19 @@ --- date_add --- SELECT date_add('2023-12-06 07:39:46.222'::TIMESTAMP_MS, INTERVAL '5 day'); -+----------------------------------------------------------------------------------------+ -| date_add(Utf8("2023-12-06 07:39:46.222"),IntervalMonthDayNano("92233720368547758080")) | -+----------------------------------------------------------------------------------------+ -| 2023-12-11T07:39:46.222 | -+----------------------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------------------------------------------------------------+ +| date_add(arrow_cast(Utf8("2023-12-06 07:39:46.222"),Utf8("Timestamp(Millisecond, None)")),IntervalMonthDayNano("92233720368547758080")) | ++-----------------------------------------------------------------------------------------------------------------------------------------+ +| 2023-12-11T07:39:46.222 | ++-----------------------------------------------------------------------------------------------------------------------------------------+ SELECT date_add('2023-12-06 07:39:46.222'::TIMESTAMP_MS, '5 day'); -+---------------------------------------------------------+ -| date_add(Utf8("2023-12-06 07:39:46.222"),Utf8("5 day")) | -+---------------------------------------------------------+ -| 2023-12-11T07:39:46.222 | -+---------------------------------------------------------+ ++----------------------------------------------------------------------------------------------------------+ +| date_add(arrow_cast(Utf8("2023-12-06 07:39:46.222"),Utf8("Timestamp(Millisecond, None)")),Utf8("5 day")) | ++----------------------------------------------------------------------------------------------------------+ +| 2023-12-11T07:39:46.222 | ++----------------------------------------------------------------------------------------------------------+ SELECT date_add('2023-12-06'::DATE, INTERVAL '3 month 5 day'); @@ -34,19 +34,19 @@ SELECT date_add('2023-12-06'::DATE, '3 month 5 day'); --- date_sub --- SELECT date_sub('2023-12-06 07:39:46.222'::TIMESTAMP_MS, INTERVAL '5 day'); -+----------------------------------------------------------------------------------------+ -| date_sub(Utf8("2023-12-06 07:39:46.222"),IntervalMonthDayNano("92233720368547758080")) | -+----------------------------------------------------------------------------------------+ -| 2023-12-01T07:39:46.222 | -+----------------------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------------------------------------------------------------+ +| date_sub(arrow_cast(Utf8("2023-12-06 07:39:46.222"),Utf8("Timestamp(Millisecond, None)")),IntervalMonthDayNano("92233720368547758080")) | ++-----------------------------------------------------------------------------------------------------------------------------------------+ +| 2023-12-01T07:39:46.222 | ++-----------------------------------------------------------------------------------------------------------------------------------------+ SELECT date_sub('2023-12-06 07:39:46.222'::TIMESTAMP_MS, '5 day'); -+---------------------------------------------------------+ -| date_sub(Utf8("2023-12-06 07:39:46.222"),Utf8("5 day")) | -+---------------------------------------------------------+ -| 2023-12-01T07:39:46.222 | -+---------------------------------------------------------+ ++----------------------------------------------------------------------------------------------------------+ +| date_sub(arrow_cast(Utf8("2023-12-06 07:39:46.222"),Utf8("Timestamp(Millisecond, None)")),Utf8("5 day")) | ++----------------------------------------------------------------------------------------------------------+ +| 2023-12-01T07:39:46.222 | ++----------------------------------------------------------------------------------------------------------+ SELECT date_sub('2023-12-06'::DATE, INTERVAL '3 month 5 day'); @@ -67,19 +67,19 @@ SELECT date_sub('2023-12-06'::DATE, '3 month 5 day'); --- date_format --- SELECT date_format('2023-12-06 07:39:46.222'::TIMESTAMP_MS, '%Y-%m-%d %H:%M:%S:%3f'); -+----------------------------------------------------------------------------+ -| date_format(Utf8("2023-12-06 07:39:46.222"),Utf8("%Y-%m-%d %H:%M:%S:%3f")) | -+----------------------------------------------------------------------------+ -| 2023-12-06 07:39:46:222 | -+----------------------------------------------------------------------------+ ++-----------------------------------------------------------------------------------------------------------------------------+ +| date_format(arrow_cast(Utf8("2023-12-06 07:39:46.222"),Utf8("Timestamp(Millisecond, None)")),Utf8("%Y-%m-%d %H:%M:%S:%3f")) | ++-----------------------------------------------------------------------------------------------------------------------------+ +| 2023-12-06 07:39:46:222 | ++-----------------------------------------------------------------------------------------------------------------------------+ SELECT date_format('2023-12-06 07:39:46.222'::TIMESTAMP_S, '%Y-%m-%d %H:%M:%S:%3f'); -+----------------------------------------------------------------------------+ -| date_format(Utf8("2023-12-06 07:39:46.222"),Utf8("%Y-%m-%d %H:%M:%S:%3f")) | -+----------------------------------------------------------------------------+ -| 2023-12-06 07:39:46:000 | -+----------------------------------------------------------------------------+ ++------------------------------------------------------------------------------------------------------------------------+ +| date_format(arrow_cast(Utf8("2023-12-06 07:39:46.222"),Utf8("Timestamp(Second, None)")),Utf8("%Y-%m-%d %H:%M:%S:%3f")) | ++------------------------------------------------------------------------------------------------------------------------+ +| 2023-12-06 07:39:46:000 | ++------------------------------------------------------------------------------------------------------------------------+ --- datetime not supported yet --- SELECT date_format('2023-12-06 07:39:46.222'::DATETIME, '%Y-%m-%d %H:%M:%S:%3f'); diff --git a/tests/cases/standalone/common/function/time.result b/tests/cases/standalone/common/function/time.result index 25f82f2a48..83589ced35 100644 --- a/tests/cases/standalone/common/function/time.result +++ b/tests/cases/standalone/common/function/time.result @@ -30,11 +30,11 @@ select to_timezone('2022-09-20T14:16:43.012345+08:00', 'Europe/Berlin'); select to_timezone('2022-09-20T14:16:43.012345+08:00'::Timestamp, 'Europe/Berlin'); -+-----------------------------------------------------------------------------+ -| to_timezone(Utf8("2022-09-20T14:16:43.012345+08:00"),Utf8("Europe/Berlin")) | -+-----------------------------------------------------------------------------+ -| 2022-09-20 08:16:43.012 | -+-----------------------------------------------------------------------------+ ++------------------------------------------------------------------------------------------------------------------------------+ +| to_timezone(arrow_cast(Utf8("2022-09-20T14:16:43.012345+08:00"),Utf8("Timestamp(Millisecond, None)")),Utf8("Europe/Berlin")) | ++------------------------------------------------------------------------------------------------------------------------------+ +| 2022-09-20 08:16:43.012 | ++------------------------------------------------------------------------------------------------------------------------------+ select to_timezone('2024-03-29T14:16:43.012345Z', 'Asia/Shanghai'); @@ -46,11 +46,11 @@ select to_timezone('2024-03-29T14:16:43.012345Z', 'Asia/Shanghai'); select to_timezone('2024-03-29T14:16:43.012345Z'::Timestamp, 'Asia/Shanghai'); -+------------------------------------------------------------------------+ -| to_timezone(Utf8("2024-03-29T14:16:43.012345Z"),Utf8("Asia/Shanghai")) | -+------------------------------------------------------------------------+ -| 2024-03-29 22:16:43.012 | -+------------------------------------------------------------------------+ ++-------------------------------------------------------------------------------------------------------------------------+ +| to_timezone(arrow_cast(Utf8("2024-03-29T14:16:43.012345Z"),Utf8("Timestamp(Millisecond, None)")),Utf8("Asia/Shanghai")) | ++-------------------------------------------------------------------------------------------------------------------------+ +| 2024-03-29 22:16:43.012 | ++-------------------------------------------------------------------------------------------------------------------------+ select to_timezone(1709992225, 'Asia/Shanghai'); @@ -62,9 +62,9 @@ select to_timezone(1709992225, 'Asia/Shanghai'); select to_timezone(1711508510000::INT64, 'Asia/Shanghai'); -+---------------------------------------------------------+ -| to_timezone(Int64(1711508510000),Utf8("Asia/Shanghai")) | -+---------------------------------------------------------+ -| 2024-03-27 11:01:50 | -+---------------------------------------------------------+ ++-----------------------------------------------------------------------------------+ +| to_timezone(arrow_cast(Int64(1711508510000),Utf8("Int64")),Utf8("Asia/Shanghai")) | ++-----------------------------------------------------------------------------------+ +| 2024-03-27 11:01:50 | ++-----------------------------------------------------------------------------------+ diff --git a/tests/cases/standalone/common/insert/insert_select.result b/tests/cases/standalone/common/insert/insert_select.result index 5bb2bb97fb..c59d1b3233 100644 --- a/tests/cases/standalone/common/insert/insert_select.result +++ b/tests/cases/standalone/common/insert/insert_select.result @@ -20,7 +20,7 @@ Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Column count insert into demo2(ts) select memory from demo1; -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Cannot automatically convert Float64 to Timestamp(Millisecond, None) +Affected Rows: 2 insert into demo2 select * from demo1; @@ -28,12 +28,14 @@ Affected Rows: 2 select * from demo2 order by ts; -+-------+------+--------+---------------------+ -| host | cpu | memory | ts | -+-------+------+--------+---------------------+ -| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 | -| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 | -+-------+------+--------+---------------------+ ++-------+------+--------+-------------------------+ +| host | cpu | memory | ts | ++-------+------+--------+-------------------------+ +| | | | 1970-01-01T00:00:00.333 | +| | | | 1970-01-01T00:00:01.024 | +| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 | +| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 | ++-------+------+--------+-------------------------+ drop table demo1; diff --git a/tests/cases/standalone/common/order/limit.result b/tests/cases/standalone/common/order/limit.result index 3648460758..dd9dec772f 100644 --- a/tests/cases/standalone/common/order/limit.result +++ b/tests/cases/standalone/common/order/limit.result @@ -16,27 +16,31 @@ SELECT a FROM test LIMIT 1; SELECT a FROM test LIMIT 1.25; -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT a FROM test LIMIT 2-1; -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative ++-------------------------+ +| a | ++-------------------------+ +| 1970-01-01T00:00:00.011 | ++-------------------------+ SELECT a FROM test LIMIT a; -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT a FROM test LIMIT a+1; -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT a FROM test LIMIT SUM(42); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT a FROM test LIMIT row_number() OVER (); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause CREATE TABLE test2 (a STRING, ts TIMESTAMP TIME INDEX); @@ -56,7 +60,7 @@ SELECT * FROM test2 LIMIT 3; select 1 limit date '1992-01-01'; -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause CREATE TABLE integers(i TIMESTAMP TIME INDEX); @@ -89,7 +93,7 @@ SELECT * FROM integers LIMIT 4; SELECT * FROM integers as int LIMIT (SELECT MIN(integers.i) FROM integers); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT * FROM integers as int OFFSET (SELECT MIN(integers.i) FROM integers); @@ -101,23 +105,23 @@ Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected ex SELECT * FROM integers as int LIMIT (SELECT max(integers.i) FROM integers where i > 5); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT * FROM integers as int LIMIT (SELECT max(integers.i) FROM integers where i > 5); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT * FROM integers as int LIMIT (SELECT NULL); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT * FROM integers as int LIMIT (SELECT -1); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause SELECT * FROM integers as int LIMIT (SELECT 'ab'); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: LIMIT must not be negative +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Unexpected expression in LIMIT clause DROP TABLE integers; diff --git a/tests/cases/standalone/common/order/order_by.result b/tests/cases/standalone/common/order/order_by.result index 36cb023e3d..2e8014c6a2 100644 --- a/tests/cases/standalone/common/order/order_by.result +++ b/tests/cases/standalone/common/order/order_by.result @@ -210,17 +210,11 @@ SELECT a-10 AS k FROM test UNION SELECT a-10 AS l FROM test ORDER BY 1-k; +---+ -- Not compatible with duckdb, give an error in greptimedb --- TODO(LFC): Failed to meet the expected error: --- expected: --- Error: 3000(PlanQuery), Schema error: No field named 'a'. Valid fields are 'k'. SELECT a-10 AS k FROM test UNION SELECT a-10 AS l FROM test ORDER BY a-10; Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: For SELECT DISTINCT, ORDER BY expressions a must appear in select list -- Not compatible with duckdb, give an error in greptimedb --- TODO(LFC): Failed to meet the expected error: --- expected: --- Error: 3000(PlanQuery), Schema error: No field named 'a'. Valid fields are 'k'. SELECT a-10 AS k FROM test UNION SELECT a-11 AS l FROM test ORDER BY a-11; Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: For SELECT DISTINCT, ORDER BY expressions a must appear in select list diff --git a/tests/cases/standalone/common/order/order_by.sql b/tests/cases/standalone/common/order/order_by.sql index d1410bbd26..38ec572978 100644 --- a/tests/cases/standalone/common/order/order_by.sql +++ b/tests/cases/standalone/common/order/order_by.sql @@ -50,15 +50,9 @@ SELECT a-10 AS k FROM test UNION SELECT a-10 AS l FROM test ORDER BY l; SELECT a-10 AS k FROM test UNION SELECT a-10 AS l FROM test ORDER BY 1-k; -- Not compatible with duckdb, give an error in greptimedb --- TODO(LFC): Failed to meet the expected error: --- expected: --- Error: 3000(PlanQuery), Schema error: No field named 'a'. Valid fields are 'k'. SELECT a-10 AS k FROM test UNION SELECT a-10 AS l FROM test ORDER BY a-10; -- Not compatible with duckdb, give an error in greptimedb --- TODO(LFC): Failed to meet the expected error: --- expected: --- Error: 3000(PlanQuery), Schema error: No field named 'a'. Valid fields are 'k'. SELECT a-10 AS k FROM test UNION SELECT a-11 AS l FROM test ORDER BY a-11; DROP TABLE test; diff --git a/tests/cases/standalone/common/order/order_by_exceptions.result b/tests/cases/standalone/common/order/order_by_exceptions.result index 9edcddfc88..851eaacb3e 100644 --- a/tests/cases/standalone/common/order/order_by_exceptions.result +++ b/tests/cases/standalone/common/order/order_by_exceptions.result @@ -10,10 +10,15 @@ SELECT a FROM test ORDER BY 2; Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Order by column out of bounds, specified: 2, max: 1 --- Not work in greptimedb SELECT a FROM test ORDER BY 'hello', a; -Error: 3001(EngineExecuteQuery), Error during planning: Sort operation is not applicable to scalar value hello ++----+ +| a | ++----+ +| 11 | +| 12 | +| 13 | ++----+ -- Ambiguous reference in union alias, give and error in duckdb, but works in greptimedb SELECT a AS k, b FROM test UNION SELECT a, b AS k FROM test ORDER BY k; @@ -41,9 +46,6 @@ SELECT a % 2, b FROM test UNION SELECT b, a % 2 AS k ORDER BY a % 2; Error: 3000(PlanQuery), Failed to plan SQL: No field named b. -- Works duckdb, but not work in greptimedb --- TODO(LFC): Failed to meet the expected error: --- expected: --- Error: 3000(PlanQuery), Schema error: No field named 'a'. Valid fields are 'test.a % Int64(2)', 'b'. SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY a % 2; Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: For SELECT DISTINCT, ORDER BY expressions a must appear in select list @@ -52,9 +54,40 @@ SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY 3; Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Order by column out of bounds, specified: 3, max: 2 -SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY -1; +-- "order by -1" is generally an undefined behavior. +-- It's not supported in PostgreSQL 16, error "ORDER BY position -1 is not in select list". +-- But in Mysql 8, it can be executed, just the actual order is ignored. +-- In DataFusion, it behaves like Mysql 8. The "sort" plan node will be eliminated by the physical optimizer +-- "EnforceSorting" because it's sort key is parsed as a constant "-1". +-- We check the "explain" of the "order by -1" query to ensure that. +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE (partitioning.*) REDACTED +EXPLAIN SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY -1; -Error: 3001(EngineExecuteQuery), Error during planning: Sort operation is not applicable to scalar value -1 ++---------------+-----------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: Int64(-1) ASC NULLS LAST | +| | Aggregate: groupBy=[[test.a % Int64(2), test.b]], aggr=[[]] | +| | Union | +| | Projection: CAST(test.a AS Int64) % Int64(2) AS test.a % Int64(2), test.b | +| | MergeScan [is_placeholder=false] | +| | Projection: CAST(test.a AS Int64) % Int64(2) AS test.a % Int64(2), test.b | +| | MergeScan [is_placeholder=false] | +| physical_plan | CoalescePartitionsExec | +| | AggregateExec: mode=FinalPartitioned, gby=[test.a % Int64(2)@0 as test.a % Int64(2), b@1 as b], aggr=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: REDACTED +| | AggregateExec: mode=Partial, gby=[test.a % Int64(2)@0 as test.a % Int64(2), b@1 as b], aggr=[] | +| | UnionExec | +| | ProjectionExec: expr=[CAST(a@0 AS Int64) % 2 as test.a % Int64(2), b@1 as b] | +| | RepartitionExec: REDACTED +| | MergeScanExec: REDACTED +| | ProjectionExec: expr=[CAST(a@0 AS Int64) % 2 as test.a % Int64(2), b@1 as b] | +| | RepartitionExec: REDACTED +| | MergeScanExec: REDACTED +| | | ++---------------+-----------------------------------------------------------------------------------------------------------+ SELECT a % 2, b FROM test UNION SELECT a % 2 AS k FROM test ORDER BY -1; diff --git a/tests/cases/standalone/common/order/order_by_exceptions.sql b/tests/cases/standalone/common/order/order_by_exceptions.sql index 3b12a0e7e5..2024daa290 100644 --- a/tests/cases/standalone/common/order/order_by_exceptions.sql +++ b/tests/cases/standalone/common/order/order_by_exceptions.sql @@ -4,7 +4,6 @@ INSERT INTO test VALUES (11, 22, 1), (12, 21, 2), (13, 22, 3); SELECT a FROM test ORDER BY 2; --- Not work in greptimedb SELECT a FROM test ORDER BY 'hello', a; -- Ambiguous reference in union alias, give and error in duckdb, but works in greptimedb @@ -15,14 +14,19 @@ SELECT a AS k, b FROM test UNION SELECT a AS k, b FROM test ORDER BY k; SELECT a % 2, b FROM test UNION SELECT b, a % 2 AS k ORDER BY a % 2; -- Works duckdb, but not work in greptimedb --- TODO(LFC): Failed to meet the expected error: --- expected: --- Error: 3000(PlanQuery), Schema error: No field named 'a'. Valid fields are 'test.a % Int64(2)', 'b'. SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY a % 2; SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY 3; -SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY -1; +-- "order by -1" is generally an undefined behavior. +-- It's not supported in PostgreSQL 16, error "ORDER BY position -1 is not in select list". +-- But in Mysql 8, it can be executed, just the actual order is ignored. +-- In DataFusion, it behaves like Mysql 8. The "sort" plan node will be eliminated by the physical optimizer +-- "EnforceSorting" because it's sort key is parsed as a constant "-1". +-- We check the "explain" of the "order by -1" query to ensure that. +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE (partitioning.*) REDACTED +EXPLAIN SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY -1; SELECT a % 2, b FROM test UNION SELECT a % 2 AS k FROM test ORDER BY -1; diff --git a/tests/cases/standalone/common/order/order_variable_size_payload.result b/tests/cases/standalone/common/order/order_variable_size_payload.result index 050026cea7..ba4386006f 100644 --- a/tests/cases/standalone/common/order/order_variable_size_payload.result +++ b/tests/cases/standalone/common/order/order_variable_size_payload.result @@ -129,14 +129,14 @@ Affected Rows: 4 SELECT * FROM tpch_q1_agg ORDER BY l_returnflag, l_linestatus; -+--------------+--------------+---------+------------------+-----------------+-----------------------+--------------------+--------------------+---------------------+-------------+-------------------------+ -| l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order | t | -+--------------+--------------+---------+------------------+-----------------+-----------------------+--------------------+--------------------+---------------------+-------------+-------------------------+ -| A | F | 3774200 | 5320753880.69 | 5054096266.6828 | 5256751331.449234 | 25.537587116854997 | 36002.12382901414 | 0.05014459706340077 | 147790 | 1970-01-01T00:00:00.003 | -| N | F | 95257 | 133737795.84 | 127132372.6512 | 132286291.229445 | 25.30066401062417 | 35521.32691633466 | 0.04939442231075697 | 3765 | 1970-01-01T00:00:00.004 | -| N | O | 7459297 | 1.05122700089e10 | 9986238338.3847 | 1.0385578376585466e10 | 25.545537671232875 | 36000.9246880137 | 0.05009595890410959 | 292000 | 1970-01-01T00:00:00.001 | -| R | F | 3785523 | 5337950526.47 | 5071818532.942 | 5274405503.049367 | 25.5259438574251 | 35994.029214030925 | 0.04998927856184382 | 148301 | 1970-01-01T00:00:00.002 | -+--------------+--------------+---------+------------------+-----------------+-----------------------+--------------------+--------------------+---------------------+-------------+-------------------------+ ++--------------+--------------+---------+----------------+-----------------+--------------------+--------------------+--------------------+---------------------+-------------+-------------------------+ +| l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order | t | ++--------------+--------------+---------+----------------+-----------------+--------------------+--------------------+--------------------+---------------------+-------------+-------------------------+ +| A | F | 3774200 | 5320753880.69 | 5054096266.6828 | 5256751331.449234 | 25.537587116854997 | 36002.12382901414 | 0.05014459706340077 | 147790 | 1970-01-01T00:00:00.003 | +| N | F | 95257 | 133737795.84 | 127132372.6512 | 132286291.229445 | 25.30066401062417 | 35521.32691633466 | 0.04939442231075697 | 3765 | 1970-01-01T00:00:00.004 | +| N | O | 7459297 | 10512270008.9 | 9986238338.3847 | 10385578376.585466 | 25.545537671232875 | 36000.9246880137 | 0.05009595890410959 | 292000 | 1970-01-01T00:00:00.001 | +| R | F | 3785523 | 5337950526.47 | 5071818532.942 | 5274405503.049367 | 25.5259438574251 | 35994.029214030925 | 0.04998927856184382 | 148301 | 1970-01-01T00:00:00.002 | ++--------------+--------------+---------+----------------+-----------------+--------------------+--------------------+--------------------+---------------------+-------------+-------------------------+ create table test5 (i int, s varchar, t TIMESTAMP TIME INDEX); diff --git a/tests/cases/standalone/common/range/by.result b/tests/cases/standalone/common/range/by.result index 1665eec32e..e4d9e243cd 100644 --- a/tests/cases/standalone/common/range/by.result +++ b/tests/cases/standalone/common/range/by.result @@ -52,12 +52,12 @@ SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' BY () ORDER BY ts; SELECT ts, length(host)::INT64 + 2, max(val) RANGE '5s' FROM host ALIGN '20s' BY (length(host)::INT64 + 2) ORDER BY ts; -+---------------------+----------------------------------------+------------------------+ -| ts | character_length(host.host) + Int64(2) | MAX(host.val) RANGE 5s | -+---------------------+----------------------------------------+------------------------+ -| 1970-01-01T00:00:00 | 7 | 3 | -| 1970-01-01T00:00:20 | 7 | 5 | -+---------------------+----------------------------------------+------------------------+ ++---------------------+------------------------------------------------------------------+------------------------+ +| ts | arrow_cast(character_length(host.host),Utf8("Int64")) + Int64(2) | MAX(host.val) RANGE 5s | ++---------------------+------------------------------------------------------------------+------------------------+ +| 1970-01-01T00:00:00 | 7 | 3 | +| 1970-01-01T00:00:20 | 7 | 5 | ++---------------------+------------------------------------------------------------------+------------------------+ -- Test error -- project non-aggregation key diff --git a/tests/cases/standalone/common/range/calculate.result b/tests/cases/standalone/common/range/calculate.result index a753a9268f..97d527968b 100644 --- a/tests/cases/standalone/common/range/calculate.result +++ b/tests/cases/standalone/common/range/calculate.result @@ -23,18 +23,18 @@ Affected Rows: 10 -- Test range expr calculate SELECT ts, host, covar(val::DOUBLE, val::DOUBLE) RANGE '20s' FROM host ALIGN '10s' ORDER BY host, ts; -+---------------------+-------+-----------------------------------------+ -| ts | host | COVARIANCE(host.val,host.val) RANGE 20s | -+---------------------+-------+-----------------------------------------+ -| 1969-12-31T23:59:50 | host1 | | -| 1970-01-01T00:00:00 | host1 | 0.5 | -| 1970-01-01T00:00:10 | host1 | 0.5 | -| 1970-01-01T00:00:20 | host1 | | -| 1969-12-31T23:59:50 | host2 | | -| 1970-01-01T00:00:00 | host2 | 0.5 | -| 1970-01-01T00:00:10 | host2 | 0.5 | -| 1970-01-01T00:00:20 | host2 | | -+---------------------+-------+-----------------------------------------+ ++---------------------+-------+------------------------------------+ +| ts | host | COVAR(host.val,host.val) RANGE 20s | ++---------------------+-------+------------------------------------+ +| 1969-12-31T23:59:50 | host1 | | +| 1970-01-01T00:00:00 | host1 | 0.5 | +| 1970-01-01T00:00:10 | host1 | 0.5 | +| 1970-01-01T00:00:20 | host1 | | +| 1969-12-31T23:59:50 | host2 | | +| 1970-01-01T00:00:00 | host2 | 0.5 | +| 1970-01-01T00:00:10 | host2 | 0.5 | +| 1970-01-01T00:00:20 | host2 | | ++---------------------+-------+------------------------------------+ SELECT ts, host, 2 * min(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts; @@ -139,18 +139,18 @@ SELECT ts, host, (min(val) + max(val)) RANGE '20s' + 1.0 FROM host ALIGN '10s' O SELECT ts, host, covar(ceil(val::DOUBLE), floor(val::DOUBLE)) RANGE '20s' FROM host ALIGN '10s' ORDER BY host, ts; -+---------------------+-------+------------------------------------------------------+ -| ts | host | COVARIANCE(ceil(host.val),floor(host.val)) RANGE 20s | -+---------------------+-------+------------------------------------------------------+ -| 1969-12-31T23:59:50 | host1 | | -| 1970-01-01T00:00:00 | host1 | 0.5 | -| 1970-01-01T00:00:10 | host1 | 0.5 | -| 1970-01-01T00:00:20 | host1 | | -| 1969-12-31T23:59:50 | host2 | | -| 1970-01-01T00:00:00 | host2 | 0.5 | -| 1970-01-01T00:00:10 | host2 | 0.5 | -| 1970-01-01T00:00:20 | host2 | | -+---------------------+-------+------------------------------------------------------+ ++---------------------+-------+-------------------------------------------------+ +| ts | host | COVAR(ceil(host.val),floor(host.val)) RANGE 20s | ++---------------------+-------+-------------------------------------------------+ +| 1969-12-31T23:59:50 | host1 | | +| 1970-01-01T00:00:00 | host1 | 0.5 | +| 1970-01-01T00:00:10 | host1 | 0.5 | +| 1970-01-01T00:00:20 | host1 | | +| 1969-12-31T23:59:50 | host2 | | +| 1970-01-01T00:00:00 | host2 | 0.5 | +| 1970-01-01T00:00:10 | host2 | 0.5 | +| 1970-01-01T00:00:20 | host2 | | ++---------------------+-------+-------------------------------------------------+ SELECT ts, host, floor(cos(ceil(sin(min(val) RANGE '5s')))) FROM host ALIGN '5s' ORDER BY host, ts; @@ -171,22 +171,22 @@ SELECT ts, host, floor(cos(ceil(sin(min(val) RANGE '5s')))) FROM host ALIGN '5s' SELECT ts, host, gcd(CAST(max(floor(val::DOUBLE)) RANGE '10s' FILL PREV as INT64) * 4, max(val * 4) RANGE '10s' FILL PREV) * length(host) + 1 FROM host ALIGN '5s' ORDER BY host, ts; -+---------------------+-------+------------------------------------------------------------------------------------------------------------------------------------------------+ -| ts | host | gcd(MAX(floor(host.val)) RANGE 10s FILL PREV * Int64(4),MAX(host.val * Int64(4)) RANGE 10s FILL PREV) * character_length(host.host) + Int64(1) | -+---------------------+-------+------------------------------------------------------------------------------------------------------------------------------------------------+ -| 1969-12-31T23:59:55 | host1 | 1 | -| 1970-01-01T00:00:00 | host1 | 1 | -| 1970-01-01T00:00:05 | host1 | 21 | -| 1970-01-01T00:00:10 | host1 | 21 | -| 1970-01-01T00:00:15 | host1 | 41 | -| 1970-01-01T00:00:20 | host1 | 41 | -| 1969-12-31T23:59:55 | host2 | 61 | -| 1970-01-01T00:00:00 | host2 | 61 | -| 1970-01-01T00:00:05 | host2 | 81 | -| 1970-01-01T00:00:10 | host2 | 81 | -| 1970-01-01T00:00:15 | host2 | 101 | -| 1970-01-01T00:00:20 | host2 | 101 | -+---------------------+-------+------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ts | host | gcd(arrow_cast(MAX(floor(host.val)) RANGE 10s FILL PREV,Utf8("Int64")) * Int64(4),MAX(host.val * Int64(4)) RANGE 10s FILL PREV) * character_length(host.host) + Int64(1) | ++---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 1969-12-31T23:59:55 | host1 | 1 | +| 1970-01-01T00:00:00 | host1 | 1 | +| 1970-01-01T00:00:05 | host1 | 21 | +| 1970-01-01T00:00:10 | host1 | 21 | +| 1970-01-01T00:00:15 | host1 | 41 | +| 1970-01-01T00:00:20 | host1 | 41 | +| 1969-12-31T23:59:55 | host2 | 61 | +| 1970-01-01T00:00:00 | host2 | 61 | +| 1970-01-01T00:00:05 | host2 | 81 | +| 1970-01-01T00:00:10 | host2 | 81 | +| 1970-01-01T00:00:15 | host2 | 101 | +| 1970-01-01T00:00:20 | host2 | 101 | ++---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ DROP TABLE host; diff --git a/tests/cases/standalone/common/range/error.result b/tests/cases/standalone/common/range/error.result index a33558589b..01beca591f 100644 --- a/tests/cases/standalone/common/range/error.result +++ b/tests/cases/standalone/common/range/error.result @@ -59,8 +59,18 @@ Error: 3000(PlanQuery), DataFusion error: Error during planning: Missing argumen -- 2.3 type mismatch SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s'; -Error: 3001(EngineExecuteQuery), Internal error: Unsupported data type Int64 for function ceil. -This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker ++-------------------------------------------------+ +| COVAR(ceil(host.val),floor(host.val)) RANGE 20s | ++-------------------------------------------------+ +| | +| 0.5 | +| 0.5 | +| | +| | +| 0.5 | +| 0.5 | +| | ++-------------------------------------------------+ -- 2.4 nest query SELECT min(max(val) RANGE '20s') RANGE '20s' FROM host ALIGN '10s'; diff --git a/tests/cases/standalone/common/tql-explain-analyze/analyze.result b/tests/cases/standalone/common/tql-explain-analyze/analyze.result index 9d063adcdc..fcc3d50bbd 100644 --- a/tests/cases/standalone/common/tql-explain-analyze/analyze.result +++ b/tests/cases/standalone/common/tql-explain-analyze/analyze.result @@ -19,7 +19,6 @@ TQL ANALYZE (0, 10, '5s') test; | plan_type_| plan_| +-+-+ | Plan with Metrics | PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j], REDACTED -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], REDACTED |_|_PromSeriesDivideExec: tags=["k"], REDACTED |_|_SortExec: expr=[k@2 ASC NULLS LAST], REDACTED @@ -40,7 +39,6 @@ TQL ANALYZE (0, 10, '1s', '2s') test; | plan_type_| plan_| +-+-+ | Plan with Metrics | PromInstantManipulateExec: range=[0..10000], lookback=[2000], interval=[1000], time index=[j], REDACTED -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], REDACTED |_|_PromSeriesDivideExec: tags=["k"], REDACTED |_|_SortExec: expr=[k@2 ASC NULLS LAST], REDACTED @@ -60,7 +58,6 @@ TQL ANALYZE ('1970-01-01T00:00:00'::timestamp, '1970-01-01T00:00:00'::timestamp | plan_type_| plan_| +-+-+ | Plan with Metrics | PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j], REDACTED -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], REDACTED |_|_PromSeriesDivideExec: tags=["k"], REDACTED |_|_SortExec: expr=[k@2 ASC NULLS LAST], REDACTED @@ -82,14 +79,12 @@ TQL ANALYZE VERBOSE (0, 10, '5s') test; | plan_type_| plan_| +-+-+ | Plan with Metrics_| PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j], REDACTED -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], REDACTED |_|_PromSeriesDivideExec: tags=["k"], REDACTED |_|_SortExec: expr=[k@2 ASC NULLS LAST], REDACTED |_|_MergeScanExec: REDACTED |_|_| | Plan with Full Metrics | PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j], REDACTED -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], REDACTED |_|_PromSeriesDivideExec: tags=["k"], REDACTED |_|_SortExec: expr=[k@2 ASC NULLS LAST], REDACTED diff --git a/tests/cases/standalone/common/tql-explain-analyze/explain.result b/tests/cases/standalone/common/tql-explain-analyze/explain.result index 2666a1a4d7..eb6be427d3 100644 --- a/tests/cases/standalone/common/tql-explain-analyze/explain.result +++ b/tests/cases/standalone/common/tql-explain-analyze/explain.result @@ -20,11 +20,10 @@ TQL EXPLAIN (0, 10, '5s') test; | | PromSeriesDivide: tags=["k"] | | | MergeScan [is_placeholder=false] | | physical_plan | PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j] | -| | RepartitionExec: partitioning=REDACTED -| | PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false] | -| | PromSeriesDivideExec: tags=["k"] | -| | SortExec: expr=[k@2 ASC NULLS LAST] | -| | MergeScanExec: REDACTED +| | PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false] | +| | PromSeriesDivideExec: tags=["k"] | +| | SortExec: expr=[k@2 ASC NULLS LAST] | +| | MergeScanExec: REDACTED | | | +---------------+-----------------------------------------------------------------------------------------------+ @@ -42,11 +41,10 @@ TQL EXPLAIN (0, 10, '1s', '2s') test; | | PromSeriesDivide: tags=["k"] | | | MergeScan [is_placeholder=false] | | physical_plan | PromInstantManipulateExec: range=[0..0], lookback=[2000], interval=[300000], time index=[j] | -| | RepartitionExec: partitioning=REDACTED -| | PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false] | -| | PromSeriesDivideExec: tags=["k"] | -| | SortExec: expr=[k@2 ASC NULLS LAST] | -| | MergeScanExec: REDACTED +| | PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false] | +| | PromSeriesDivideExec: tags=["k"] | +| | SortExec: expr=[k@2 ASC NULLS LAST] | +| | MergeScanExec: REDACTED | | | +---------------+---------------------------------------------------------------------------------------------+ @@ -63,11 +61,10 @@ TQL EXPLAIN ('1970-01-01T00:00:00'::timestamp, '1970-01-01T00:00:00'::timestamp | | PromSeriesDivide: tags=["k"] | | | MergeScan [is_placeholder=false] | | physical_plan | PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j] | -| | RepartitionExec: partitioning=REDACTED -| | PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false] | -| | PromSeriesDivideExec: tags=["k"] | -| | SortExec: expr=[k@2 ASC NULLS LAST] | -| | MergeScanExec: REDACTED +| | PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false] | +| | PromSeriesDivideExec: tags=["k"] | +| | SortExec: expr=[k@2 ASC NULLS LAST] | +| | MergeScanExec: REDACTED | | | +---------------+-----------------------------------------------------------------------------------------------+ @@ -86,8 +83,9 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; |_|_PromSeriesNormalize: offset=[0], time index=[j], filter NaN: [false]_| |_|_PromSeriesDivide: tags=["k"]_| |_|_Sort: test.k DESC NULLS LAST, test.j DESC NULLS LAST_| -|_|_Filter: test.j >= TimestampMillisecond(-300000, None) AND test.j <= TimestampMillisecond(300000, None) | +|_|_Filter: test.j >= TimestampMillisecond(-300000, None) AND test.j <= TimestampMillisecond(300000, None)_| |_|_TableScan: test_| +| logical_plan after apply_function_rewrites_| SAME TEXT AS ABOVE_| | logical_plan after count_wildcard_rule_| SAME TEXT AS ABOVE_| | logical_plan after StringNormalizationRule_| SAME TEXT AS ABOVE_| | logical_plan after inline_table_scan_| SAME TEXT AS ABOVE_| @@ -106,7 +104,6 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | logical_plan after scalar_subquery_to_join_| SAME TEXT AS ABOVE_| | logical_plan after extract_equijoin_predicate_| SAME TEXT AS ABOVE_| | logical_plan after simplify_expressions_| SAME TEXT AS ABOVE_| -| logical_plan after merge_projection_| SAME TEXT AS ABOVE_| | logical_plan after rewrite_disjunctive_predicate_| SAME TEXT AS ABOVE_| | logical_plan after eliminate_duplicated_expr_| SAME TEXT AS ABOVE_| | logical_plan after eliminate_filter_| SAME TEXT AS ABOVE_| @@ -123,9 +120,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | logical_plan after simplify_expressions_| SAME TEXT AS ABOVE_| | logical_plan after unwrap_cast_in_comparison_| SAME TEXT AS ABOVE_| | logical_plan after common_sub_expression_eliminate_| SAME TEXT AS ABOVE_| -| logical_plan after push_down_projection_| SAME TEXT AS ABOVE_| -| logical_plan after eliminate_projection_| SAME TEXT AS ABOVE_| -| logical_plan after push_down_limit_| SAME TEXT AS ABOVE_| +| logical_plan after optimize_projections_| SAME TEXT AS ABOVE_| | logical_plan after OrderHintRule_| SAME TEXT AS ABOVE_| | logical_plan_| PromInstantManipulate: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| |_|_PromSeriesNormalize: offset=[0], time index=[j], filter NaN: [false]_| @@ -136,6 +131,11 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; |_|_PromSeriesDivideExec: tags=["k"]_| |_|_MergeScanExec: REDACTED |_|_| +| initial_physical_plan_with_stats_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j], statistics=[Rows=Inexact(0), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] | +|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| +|_|_PromSeriesDivideExec: tags=["k"], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| +|_|_MergeScanExec: REDACTED +|_|_| | physical_plan after OutputRequirements_| OutputRequirementExec_| |_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false]_| @@ -144,25 +144,19 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; |_|_| | physical_plan after aggregate_statistics_| SAME TEXT AS ABOVE_| | physical_plan after join_selection_| SAME TEXT AS ABOVE_| -| physical_plan after EnforceDistribution_| OutputRequirementExec_| -|_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_RepartitionExec: partitioning=REDACTED -|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_MergeScanExec: REDACTED -|_|_| +| physical_plan after LimitedDistinctAggregation_| SAME TEXT AS ABOVE_| +| physical_plan after EnforceDistribution_| SAME TEXT AS ABOVE_| | physical_plan after CombinePartialFinalAggregate_| SAME TEXT AS ABOVE_| | physical_plan after EnforceSorting_| OutputRequirementExec_| |_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false]_| |_|_PromSeriesDivideExec: tags=["k"]_| |_|_SortExec: expr=[k@2 ASC NULLS LAST]_| |_|_MergeScanExec: REDACTED |_|_| +| physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_| | physical_plan after coalesce_batches_| SAME TEXT AS ABOVE_| | physical_plan after OutputRequirements_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false]_| |_|_PromSeriesDivideExec: tags=["k"]_| |_|_SortExec: expr=[k@2 ASC NULLS LAST]_| @@ -170,13 +164,19 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; |_|_| | physical_plan after PipelineChecker_| SAME TEXT AS ABOVE_| | physical_plan after LimitAggregation_| SAME TEXT AS ABOVE_| +| physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_| | physical_plan_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_RepartitionExec: partitioning=REDACTED |_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false]_| |_|_PromSeriesDivideExec: tags=["k"]_| |_|_SortExec: expr=[k@2 ASC NULLS LAST]_| |_|_MergeScanExec: REDACTED |_|_| +| physical_plan_with_stats_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j], statistics=[Rows=Inexact(0), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] | +|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [false], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| +|_|_PromSeriesDivideExec: tags=["k"], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| +|_|_SortExec: expr=[k@2 ASC NULLS LAST], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| +|_|_MergeScanExec: REDACTED +|_|_| +-+-+ DROP TABLE test; diff --git a/tests/cases/standalone/common/types/decimal/decimal_arithmetic.result b/tests/cases/standalone/common/types/decimal/decimal_arithmetic.result index 9be450cc7f..28a5388ca3 100644 --- a/tests/cases/standalone/common/types/decimal/decimal_arithmetic.result +++ b/tests/cases/standalone/common/types/decimal/decimal_arithmetic.result @@ -69,7 +69,7 @@ SELECT ('0.5'::DECIMAL(1,1) + 10000)::VARCHAR, -- out of range SELECT ('0.54321543215432154321543215432154321'::DECIMAL(35,35) + 10000)::VARCHAR; -Error: 3001(EngineExecuteQuery), DataFusion error: Compute error: Overflow happened on: 10000 * 100000000000000000000000000000000000 +Error: 3001(EngineExecuteQuery), Compute error: Overflow happened on: 10000 * 100000000000000000000000000000000000 -- different types SELECT '0.5'::DECIMAL(1,1) + 1::TINYINT, diff --git a/tests/cases/standalone/common/types/decimal/decimal_cast.result b/tests/cases/standalone/common/types/decimal/decimal_cast.result index 0e8de7db67..5f9bb7955e 100644 --- a/tests/cases/standalone/common/types/decimal/decimal_cast.result +++ b/tests/cases/standalone/common/types/decimal/decimal_cast.result @@ -12,19 +12,19 @@ SELECT 127::DECIMAL(3,0)::TINYINT, -127::DECIMAL(3,0)::TINYINT, -7::DECIMAL(9,1) SELECT 128::DECIMAL(3,0)::TINYINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 128 is out of range Int8 +Error: 3001(EngineExecuteQuery), Cast error: value of 128 is out of range Int8 SELECT -128::DECIMAL(9,0)::TINYINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 128 is out of range Int8 +Error: 3001(EngineExecuteQuery), Cast error: value of 128 is out of range Int8 SELECT 128::DECIMAL(18,0)::TINYINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 128 is out of range Int8 +Error: 3001(EngineExecuteQuery), Cast error: value of 128 is out of range Int8 SELECT 14751947891758972421513::DECIMAL(38,0)::TINYINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 14751947891758971486208 is out of range Int8 +Error: 3001(EngineExecuteQuery), Cast error: value of 14751947891758971486208 is out of range Int8 -- smallint SELECT 127::DECIMAL(3,0)::SMALLINT, -32767::DECIMAL(5,0)::SMALLINT, -7::DECIMAL(9,1)::SMALLINT, 27::DECIMAL(18,1)::SMALLINT, 33::DECIMAL(38,1)::SMALLINT; @@ -37,15 +37,15 @@ SELECT 127::DECIMAL(3,0)::SMALLINT, -32767::DECIMAL(5,0)::SMALLINT, -7::DECIMAL( SELECT -32768::DECIMAL(9,0)::SMALLINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 32768 is out of range Int16 +Error: 3001(EngineExecuteQuery), Cast error: value of 32768 is out of range Int16 SELECT 32768::DECIMAL(18,0)::SMALLINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 32768 is out of range Int16 +Error: 3001(EngineExecuteQuery), Cast error: value of 32768 is out of range Int16 SELECT 14751947891758972421513::DECIMAL(38,0)::SMALLINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 14751947891758971486208 is out of range Int16 +Error: 3001(EngineExecuteQuery), Cast error: value of 14751947891758971486208 is out of range Int16 -- integer SELECT 127::DECIMAL(3,0)::INTEGER, -2147483647::DECIMAL(10,0)::INTEGER, -7::DECIMAL(9,1)::INTEGER, 27::DECIMAL(18,1)::INTEGER, 33::DECIMAL(38,1)::INTEGER; @@ -58,11 +58,11 @@ SELECT 127::DECIMAL(3,0)::INTEGER, -2147483647::DECIMAL(10,0)::INTEGER, -7::DECI SELECT 2147483648::DECIMAL(18,0)::INTEGER; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 2147483648 is out of range Int32 +Error: 3001(EngineExecuteQuery), Cast error: value of 2147483648 is out of range Int32 SELECT 14751947891758972421513::DECIMAL(38,0)::INTEGER; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 14751947891758971486208 is out of range Int32 +Error: 3001(EngineExecuteQuery), Cast error: value of 14751947891758971486208 is out of range Int32 -- bigint SELECT 127::DECIMAL(3,0)::BIGINT, -9223372036854775807::DECIMAL(19,0)::BIGINT, -7::DECIMAL(9,1)::BIGINT, 27::DECIMAL(18,1)::BIGINT, 33::DECIMAL(38,1)::BIGINT; @@ -75,7 +75,7 @@ SELECT 127::DECIMAL(3,0)::BIGINT, -9223372036854775807::DECIMAL(19,0)::BIGINT, - SELECT 14751947891758972421513::DECIMAL(38,0)::BIGINT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: value of 14751947891758971486208 is out of range Int64 +Error: 3001(EngineExecuteQuery), Cast error: value of 14751947891758971486208 is out of range Int64 -- float SELECT 127::DECIMAL(3,0)::FLOAT, -17014118346046923173168730371588410572::DECIMAL(38,0)::FLOAT, -7::DECIMAL(9,1)::FLOAT, 27::DECIMAL(18,1)::FLOAT, 33::DECIMAL(38,1)::FLOAT; @@ -99,32 +99,32 @@ SELECT 127::DECIMAL(3,0)::DOUBLE, -17014118346046923173168730371588410572::DECIM -- tinyint SELECT 100::TINYINT::DECIMAL(18,3), 200::TINYINT::DECIMAL(3,0), (-300)::TINYINT::DECIMAL(3,0), 0::TINYINT::DECIMAL(3,3); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value 200 to type Int8 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value 200 to type Int8 SELECT 100::TINYINT::DECIMAL(38,35), 200::TINYINT::DECIMAL(9,6); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value 200 to type Int8 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value 200 to type Int8 -- overflow SELECT 100::TINYINT::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 1::TINYINT::DECIMAL(3,3); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 100::TINYINT::DECIMAL(18,17); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 100::TINYINT::DECIMAL(9,7); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 100::TINYINT::DECIMAL(38,37); -Error: 3001(EngineExecuteQuery), DataFusion error: Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 +Error: 3001(EngineExecuteQuery), Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 -- smallint SELECT 100::SMALLINT::DECIMAL(18,3), 200::SMALLINT::DECIMAL(3,0), (-300)::SMALLINT::DECIMAL(3,0), 0::SMALLINT::DECIMAL(3,3); @@ -146,23 +146,23 @@ SELECT 100::SMALLINT::DECIMAL(38,35), 200::SMALLINT::DECIMAL(9,6); -- overflow SELECT 100::SMALLINT::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 1::SMALLINT::DECIMAL(3,3); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 100::SMALLINT::DECIMAL(18,17); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 100::SMALLINT::DECIMAL(9,7); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 100::SMALLINT::DECIMAL(38,37); -Error: 3001(EngineExecuteQuery), DataFusion error: Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 +Error: 3001(EngineExecuteQuery), Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 -- integer SELECT 100::INTEGER::DECIMAL(18,3), 200::INTEGER::DECIMAL(3,0), (-300)::INTEGER::DECIMAL(3,0), 0::INTEGER::DECIMAL(3,3); @@ -184,31 +184,31 @@ SELECT 100::INTEGER::DECIMAL(38,35), 200::INTEGER::DECIMAL(9,6), 2147483647::INT -- overflow SELECT 100::INTEGER::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 10000000::INTEGER::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT -10000000::INTEGER::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 1::INTEGER::DECIMAL(3,3); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 100::INTEGER::DECIMAL(18,17); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 100::INTEGER::DECIMAL(9,7); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 100::INTEGER::DECIMAL(38,37); -Error: 3001(EngineExecuteQuery), DataFusion error: Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 +Error: 3001(EngineExecuteQuery), Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 -- bigint SELECT 100::BIGINT::DECIMAL(18,3), 200::BIGINT::DECIMAL(3,0), (-100)::BIGINT::DECIMAL(3,0), 0::BIGINT::DECIMAL(3,3); @@ -238,31 +238,31 @@ SELECT 922337203685477580::BIGINT::DECIMAL(18,0), (-922337203685477580)::BIGINT: -- overflow SELECT 100::BIGINT::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 10000000::BIGINT::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT -10000000::BIGINT::DECIMAL(3,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 1::BIGINT::DECIMAL(3,3); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 100::BIGINT::DECIMAL(18,17); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 100::BIGINT::DECIMAL(9,7); -Error: 3001(EngineExecuteQuery), DataFusion error: Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 100::BIGINT::DECIMAL(38,37); -Error: 3001(EngineExecuteQuery), DataFusion error: Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 +Error: 3001(EngineExecuteQuery), Compute error: Overflow happened on: 100 * 10000000000000000000000000000000000000 -- float SELECT 100::FLOAT::DECIMAL(18,3), 200::FLOAT::DECIMAL(3,0), (-300)::FLOAT::DECIMAL(3,0), 0::FLOAT::DECIMAL(3,3); @@ -292,92 +292,52 @@ SELECT 1.25::FLOAT::DECIMAL(3,2); -- overflow SELECT 100::FLOAT::DECIMAL(3,1); -+------------+ -| Int64(100) | -+------------+ -| 10.0 | -+------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 10000000::FLOAT::DECIMAL(3,1); -+-----------------+ -| Int64(10000000) | -+-----------------+ -| 10.0 | -+-----------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT -10000000::FLOAT::DECIMAL(3,1); -+---------------------+ -| (- Int64(10000000)) | -+---------------------+ -| -10.0 | -+---------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 1::FLOAT::DECIMAL(3,3); -+----------+ -| Int64(1) | -+----------+ -| .100 | -+----------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 100::FLOAT::DECIMAL(18,17); -+---------------------+ -| Int64(100) | -+---------------------+ -| 1.00000000000000000 | -+---------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 100::FLOAT::DECIMAL(9,7); -+------------+ -| Int64(100) | -+------------+ -| 10.0000000 | -+------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 100::FLOAT::DECIMAL(38,37); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 37). Overflowing on 100.0 +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 37). Overflowing on 100.0 -- Some controversial cases SELECT 17014118346046923173168730371588410572::FLOAT::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on 1.7014119e37 +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on 1.7014119e37 SELECT 17014118346046923173168730371588410572::FLOAT::DECIMAL(37,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 1701411859957704321881461067092905164 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118599577043218814610670929051648 is too large to store in a Decimal128 of precision 37. Max is 9999999999999999999999999999999999999 SELECT 17014118346046923173168730371588410572::FLOAT::DECIMAL(18,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 170141185995770432 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118599577043218814610670929051648 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 17014118346046923173168730371588410572::FLOAT::DECIMAL(9,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 170141185 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118599577043218814610670929051648 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 17014118346046923173168730371588410572::FLOAT::DECIMAL(4,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 1701 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118599577043218814610670929051648 is too large to store in a Decimal128 of precision 4. Max is 9999 -- double SELECT 100::DOUBLE::DECIMAL(18,3), 200::DOUBLE::DECIMAL(3,0), (-300)::DOUBLE::DECIMAL(3,0), 0::DOUBLE::DECIMAL(3,3); @@ -407,90 +367,50 @@ SELECT 1.25::DOUBLE::DECIMAL(3,2); -- overflow SELECT 100::DOUBLE::DECIMAL(3,1); -+------------+ -| Int64(100) | -+------------+ -| 10.0 | -+------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 10000000::DOUBLE::DECIMAL(3,1); -+-----------------+ -| Int64(10000000) | -+-----------------+ -| 10.0 | -+-----------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT -10000000::DOUBLE::DECIMAL(3,1); -+---------------------+ -| (- Int64(10000000)) | -+---------------------+ -| -10.0 | -+---------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 100000000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 1::DOUBLE::DECIMAL(3,3); -+----------+ -| Int64(1) | -+----------+ -| .100 | -+----------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999 SELECT 100::DOUBLE::DECIMAL(18,17); -+---------------------+ -| Int64(100) | -+---------------------+ -| 1.00000000000000000 | -+---------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 10000000000000000000 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 100::DOUBLE::DECIMAL(9,7); -+------------+ -| Int64(100) | -+------------+ -| 10.0000000 | -+------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 1000000000 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 100::DOUBLE::DECIMAL(38,37); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 37). Overflowing on 100.0 +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 37). Overflowing on 100.0 -- Some controversial cases SELECT 17014118346046923173168730371588410572::DOUBLE::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on 1.7014118346046924e37 +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on 1.7014118346046924e37 SELECT 17014118346046923173168730371588410572::DOUBLE::DECIMAL(37,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 1701411834604692411764202694551745331 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118346046924117642026945517453312 is too large to store in a Decimal128 of precision 37. Max is 9999999999999999999999999999999999999 SELECT 17014118346046923173168730371588410572::DOUBLE::DECIMAL(18,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 170141183460469241 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118346046924117642026945517453312 is too large to store in a Decimal128 of precision 18. Max is 999999999999999999 SELECT 17014118346046923173168730371588410572::DOUBLE::DECIMAL(9,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 170141183 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118346046924117642026945517453312 is too large to store in a Decimal128 of precision 9. Max is 999999999 SELECT 17014118346046923173168730371588410572::DOUBLE::DECIMAL(4,0); -+-------------------------------------------------+ -| Float64(17014118346046924000000000000000000000) | -+-------------------------------------------------+ -| 1701 | -+-------------------------------------------------+ +Error: 3001(EngineExecuteQuery), Invalid argument error: 17014118346046924117642026945517453312 is too large to store in a Decimal128 of precision 4. Max is 9999 diff --git a/tests/cases/standalone/common/types/decimal/decimal_ops.result b/tests/cases/standalone/common/types/decimal/decimal_ops.result index 3081f3811c..35de1a1d27 100644 --- a/tests/cases/standalone/common/types/decimal/decimal_ops.result +++ b/tests/cases/standalone/common/types/decimal/decimal_ops.result @@ -124,7 +124,7 @@ SELECT CEIL('0.1'::DECIMAL), CEIL('-0.1'::DECIMAL), CEIL(NULL::DECIMAL); +-------------------+--------------------+------------+ | ceil(Utf8("0.1")) | ceil(Utf8("-0.1")) | ceil(NULL) | +-------------------+--------------------+------------+ -| 1.0 | 0.0 | | +| 1.0 | -0.0 | | +-------------------+--------------------+------------+ SELECT CEIL('100.3'::DECIMAL), CEIL('-127012.3'::DECIMAL); @@ -149,7 +149,7 @@ SELECT CEIL('999.9'::DECIMAL(4,1)), CEIL('99999999.9'::DECIMAL(9,1)), CEIL('9999 +---------------------+--------------------------+-----------------------------------+-------------------------------------------------------+ | ceil(Utf8("999.9")) | ceil(Utf8("99999999.9")) | ceil(Utf8("99999999999999999.9")) | ceil(Utf8("9999999999999999999999999999999999999.9")) | +---------------------+--------------------------+-----------------------------------+-------------------------------------------------------+ -| 1000.0 | 100000000.0 | 1.0e17 | 1.0e37 | +| 1000.0 | 100000000.0 | 1e17 | 1e37 | +---------------------+--------------------------+-----------------------------------+-------------------------------------------------------+ SELECT CEIL('-999.9'::DECIMAL(4,1)), CEIL('-99999999.9'::DECIMAL(9,1)), CEIL('-99999999999999999.9'::DECIMAL(18,1)), CEIL('-9999999999999999999999999999999999999.9'::DECIMAL(38,1)); @@ -157,7 +157,7 @@ SELECT CEIL('-999.9'::DECIMAL(4,1)), CEIL('-99999999.9'::DECIMAL(9,1)), CEIL('-9 +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ | ceil(Utf8("-999.9")) | ceil(Utf8("-99999999.9")) | ceil(Utf8("-99999999999999999.9")) | ceil(Utf8("-9999999999999999999999999999999999999.9")) | +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ -| -999.0 | -99999999.0 | -1.0e17 | -1.0e37 | +| -999.0 | -99999999.0 | -1e17 | -1e37 | +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ -- test FLOOR function @@ -191,7 +191,7 @@ SELECT FLOOR('999.9'::DECIMAL(4,1)), FLOOR('99999999.9'::DECIMAL(9,1)), FLOOR('9 +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ | floor(Utf8("999.9")) | floor(Utf8("99999999.9")) | floor(Utf8("99999999999999999.9")) | floor(Utf8("9999999999999999999999999999999999999.9")) | +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ -| 999.0 | 99999999.0 | 1.0e17 | 1.0e37 | +| 999.0 | 99999999.0 | 1e17 | 1e37 | +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ SELECT FLOOR('-999.9'::DECIMAL(4,1)), FLOOR('-99999999.9'::DECIMAL(9,1)), FLOOR('-99999999999999999.9'::DECIMAL(18,1)), FLOOR('-9999999999999999999999999999999999999.9'::DECIMAL(38,1)); @@ -199,7 +199,7 @@ SELECT FLOOR('-999.9'::DECIMAL(4,1)), FLOOR('-99999999.9'::DECIMAL(9,1)), FLOOR( +-----------------------+----------------------------+-------------------------------------+---------------------------------------------------------+ | floor(Utf8("-999.9")) | floor(Utf8("-99999999.9")) | floor(Utf8("-99999999999999999.9")) | floor(Utf8("-9999999999999999999999999999999999999.9")) | +-----------------------+----------------------------+-------------------------------------+---------------------------------------------------------+ -| -1000.0 | -100000000.0 | -1.0e17 | -1.0e37 | +| -1000.0 | -100000000.0 | -1e17 | -1e37 | +-----------------------+----------------------------+-------------------------------------+---------------------------------------------------------+ -- test unary ROUND function @@ -208,7 +208,7 @@ SELECT ROUND('0.1'::DECIMAL), ROUND('-0.1'::DECIMAL), ROUND(NULL::DECIMAL); +--------------------+---------------------+-------------+ | round(Utf8("0.1")) | round(Utf8("-0.1")) | round(NULL) | +--------------------+---------------------+-------------+ -| 0.0 | 0.0 | | +| 0.0 | -0.0 | | +--------------------+---------------------+-------------+ SELECT ROUND('100.3'::DECIMAL), ROUND('-127012.3'::DECIMAL); @@ -233,7 +233,7 @@ SELECT ROUND('999.9'::DECIMAL(4,1)), ROUND('99999999.9'::DECIMAL(9,1)), ROUND('9 +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ | round(Utf8("999.9")) | round(Utf8("99999999.9")) | round(Utf8("99999999999999999.9")) | round(Utf8("9999999999999999999999999999999999999.9")) | +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ -| 1000.0 | 100000000.0 | 1.0e17 | 1.0e37 | +| 1000.0 | 100000000.0 | 1e17 | 1e37 | +----------------------+---------------------------+------------------------------------+--------------------------------------------------------+ SELECT ROUND('-999.9'::DECIMAL(4,1)), ROUND('-99999999.9'::DECIMAL(9,1)), ROUND('-99999999999999999.9'::DECIMAL(18,1)), ROUND('-9999999999999999999999999999999999999.9'::DECIMAL(38,1)); @@ -241,7 +241,7 @@ SELECT ROUND('-999.9'::DECIMAL(4,1)), ROUND('-99999999.9'::DECIMAL(9,1)), ROUND( +-----------------------+----------------------------+-------------------------------------+---------------------------------------------------------+ | round(Utf8("-999.9")) | round(Utf8("-99999999.9")) | round(Utf8("-99999999999999999.9")) | round(Utf8("-9999999999999999999999999999999999999.9")) | +-----------------------+----------------------------+-------------------------------------+---------------------------------------------------------+ -| -1000.0 | -100000000.0 | -1.0e17 | -1.0e37 | +| -1000.0 | -100000000.0 | -1e17 | -1e37 | +-----------------------+----------------------------+-------------------------------------+---------------------------------------------------------+ -- round with precision @@ -340,7 +340,7 @@ SELECT ROUND('-1049578239572094512.32415'::DECIMAL(30,10), 0)::VARCHAR, +----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+---------------------------------------------------------+ | round(Utf8("-1049578239572094512.32415"),Int64(0)) | round(Utf8("-1049578239572094512.32415"),Int64(-1)) | round(Utf8("-1049578239572094512.32415"),Int64(-2)) | round(Utf8("-1049578239572094512.32415"),Int64(-3)) | round(Utf8("-1049578239572094512.32415"),Int64(-4)) | round(Utf8("-1049578239572094512.32415"),Int64(-5)) | round(Utf8("-1049578239572094512.32415"),Int64(-6)) | round(Utf8("-1049578239572094512.32415"),Int64(-7)) | round(Utf8("-1049578239572094512.32415"),Int64(-8)) | round(Utf8("-1049578239572094512.32415"),Int64(-9)) | round(Utf8("-1049578239572094512.32415"),Int64(-10)) | round(Utf8("-1049578239572094512.32415"),Int64(-11)) | round(Utf8("-1049578239572094512.32415"),Int64(-12)) | round(Utf8("-1049578239572094512.32415"),Int64(-13)) | round(Utf8("-1049578239572094512.32415"),Int64(-14)) | round(Utf8("-1049578239572094512.32415"),Int64(-15)) | round(Utf8("-1049578239572094512.32415"),Int64(-16)) | round(Utf8("-1049578239572094512.32415"),Int64(-18)) | round(Utf8("-1049578239572094512.32415"),Int64(-19)) | round(Utf8("-1049578239572094512.32415"),Int64(-20)) | round(Utf8("-1049578239572094512.32415"),Int64(-19842)) | +----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+---------------------------------------------------------+ -| -1.0495782395720946e18 | -1.0495782395720947e18 | -1.0495782395720946e18 | -1.049578239572095e18 | -1.04957823957209e18 | -1.0495782395721e18 | -1.049578239572e18 | -1.04957823957e18 | -1.0495782396e18 | -1.0495782399999999e18 | -1.04957824e18 | -1.0495782e18 | -1.049578e18 | -1.04958e18 | -1.0496e18 | -1.0499999999999999e18 | -1.05e18 | -9.999999999999999e17 | 0.0 | 0.0 | NaN | +| -1.0495782395720946e18 | -1.0495782395720947e18 | -1.0495782395720946e18 | -1.049578239572095e18 | -1.04957823957209e18 | -1.0495782395721e18 | -1.049578239572e18 | -1.04957823957e18 | -1.0495782396e18 | -1.0495782399999999e18 | -1.04957824e18 | -1.0495782e18 | -1.049578e18 | -1.04958e18 | -1.0496e18 | -1.0499999999999999e18 | -1.05e18 | -9.999999999999999e17 | -0.0 | -0.0 | NaN | +----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+-----------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+------------------------------------------------------+---------------------------------------------------------+ SELECT ROUND(12::DECIMAL(3,0)); diff --git a/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.result b/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.result index 68af47cf58..0c05be4492 100644 --- a/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.result +++ b/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.result @@ -30,14 +30,12 @@ select '1.234499999'::DECIMAL(5,4); | 1.2345 | +---------------------+ --- arrow-rs is a little strange about the conversion behavior of negative numbers. --- issue: https://github.com/apache/arrow-datafusion/issues/8326 select '-1.023450000001'::DECIMAL(5,4); +-------------------------+ | Utf8("-1.023450000001") | +-------------------------+ -| -0.9765 | +| -1.0235 | +-------------------------+ select '-1.234499999'::DECIMAL(4,3); @@ -45,7 +43,7 @@ select '-1.234499999'::DECIMAL(4,3); +----------------------+ | Utf8("-1.234499999") | +----------------------+ -| -0.766 | +| -1.234 | +----------------------+ select '-1.23499999'::DECIMAL(4,3); @@ -53,7 +51,7 @@ select '-1.23499999'::DECIMAL(4,3); +---------------------+ | Utf8("-1.23499999") | +---------------------+ -| -0.765 | +| -1.235 | +---------------------+ select '-1.234499999'::DECIMAL(5,4); @@ -61,6 +59,6 @@ select '-1.234499999'::DECIMAL(5,4); +----------------------+ | Utf8("-1.234499999") | +----------------------+ -| -0.7655 | +| -1.2345 | +----------------------+ diff --git a/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.sql b/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.sql index 89fd28a385..547054c322 100644 --- a/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.sql +++ b/tests/cases/standalone/common/types/decimal/decimal_small_precision_behavior.sql @@ -6,8 +6,6 @@ select '1.23499999'::DECIMAL(4,3); select '1.234499999'::DECIMAL(5,4); --- arrow-rs is a little strange about the conversion behavior of negative numbers. --- issue: https://github.com/apache/arrow-datafusion/issues/8326 select '-1.023450000001'::DECIMAL(5,4); select '-1.234499999'::DECIMAL(4,3); diff --git a/tests/cases/standalone/common/types/decimal/decimal_table.result b/tests/cases/standalone/common/types/decimal/decimal_table.result index aaf2a08450..03f04182a0 100644 --- a/tests/cases/standalone/common/types/decimal/decimal_table.result +++ b/tests/cases/standalone/common/types/decimal/decimal_table.result @@ -19,7 +19,7 @@ SELECT d + 1000000000000000.0 FROM decimals; +----------------------------------------+ | decimals.d + Float64(1000000000000000) | +----------------------------------------+ -| 1.0e17 | +| 1e17 | +----------------------------------------+ SELECT -1 - d FROM decimals; @@ -35,7 +35,7 @@ SELECT -1000000000000000.0 - d FROM decimals; +-----------------------------------------+ | Float64(-1000000000000000) - decimals.d | +-----------------------------------------+ -| -1.0e17 | +| -1e17 | +-----------------------------------------+ SELECT 1 * d FROM decimals; diff --git a/tests/cases/standalone/common/types/float/nan_aggregate.result b/tests/cases/standalone/common/types/float/nan_aggregate.result index 50958986ee..77636174c5 100644 --- a/tests/cases/standalone/common/types/float/nan_aggregate.result +++ b/tests/cases/standalone/common/types/float/nan_aggregate.result @@ -104,14 +104,14 @@ Affected Rows: 4 SELECT * FROM floats_doubles; -+---------+---------+-------------------------+ -| f | d | ts | -+---------+---------+-------------------------+ -| 2.0e38 | 1.0e308 | 1970-01-01T00:00:00.001 | -| 2.0e38 | 1.0e308 | 1970-01-01T00:00:00.002 | -| -1.0e38 | 0.0 | 1970-01-01T00:00:00.003 | -| -1.0e38 | 0.0 | 1970-01-01T00:00:00.004 | -+---------+---------+-------------------------+ ++-------+-------+-------------------------+ +| f | d | ts | ++-------+-------+-------------------------+ +| 2e38 | 1e308 | 1970-01-01T00:00:00.001 | +| 2e38 | 1e308 | 1970-01-01T00:00:00.002 | +| -1e38 | 0.0 | 1970-01-01T00:00:00.003 | +| -1e38 | 0.0 | 1970-01-01T00:00:00.004 | ++-------+-------+-------------------------+ -- not out of range -- SELECT SUM(f) FROM floats_doubles WHERE f > 0; diff --git a/tests/cases/standalone/common/types/float/nan_cast.result b/tests/cases/standalone/common/types/float/nan_cast.result index 585981f449..927aabf071 100644 --- a/tests/cases/standalone/common/types/float/nan_cast.result +++ b/tests/cases/standalone/common/types/float/nan_cast.result @@ -11,23 +11,23 @@ SELECT 'nan'::FLOAT; -- cannot cast nan, inf or -inf to these types SELECT 'nan'::FLOAT::INT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value NaN to type Int32 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value NaN to type Int32 SELECT 'nan'::FLOAT::DECIMAL(4,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(4, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(4, 1). Overflowing on NaN SELECT 'nan'::FLOAT::DECIMAL(9,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(9, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(9, 1). Overflowing on NaN SELECT 'nan'::FLOAT::DECIMAL(18,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(18, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(18, 1). Overflowing on NaN SELECT 'nan'::FLOAT::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on NaN SELECT 'inf'::FLOAT; @@ -40,23 +40,23 @@ SELECT 'inf'::FLOAT; -- cannot cast nan, inf or -inf to these types SELECT 'inf'::FLOAT::INT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value inf to type Int32 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value inf to type Int32 SELECT 'inf'::FLOAT::DECIMAL(4,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(4, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(4, 1). Overflowing on inf SELECT 'inf'::FLOAT::DECIMAL(9,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(9, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(9, 1). Overflowing on inf SELECT 'inf'::FLOAT::DECIMAL(18,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(18, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(18, 1). Overflowing on inf SELECT 'inf'::FLOAT::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on inf SELECT '-inf'::FLOAT; @@ -69,23 +69,23 @@ SELECT '-inf'::FLOAT; -- cannot cast nan, inf or -inf to these types SELECT '-inf'::FLOAT::INT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value -inf to type Int32 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value -inf to type Int32 SELECT '-inf'::FLOAT::DECIMAL(4,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(4, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(4, 1). Overflowing on -inf SELECT '-inf'::FLOAT::DECIMAL(9,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(9, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(9, 1). Overflowing on -inf SELECT '-inf'::FLOAT::DECIMAL(18,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(18, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(18, 1). Overflowing on -inf SELECT '-inf'::FLOAT::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on -inf SELECT 'nan'::DOUBLE; @@ -98,23 +98,23 @@ SELECT 'nan'::DOUBLE; -- cannot cast nan, inf or -inf to these types SELECT 'nan'::DOUBLE::INT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value NaN to type Int32 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value NaN to type Int32 SELECT 'nan'::DOUBLE::DECIMAL(4,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(4, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(4, 1). Overflowing on NaN SELECT 'nan'::DOUBLE::DECIMAL(9,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(9, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(9, 1). Overflowing on NaN SELECT 'nan'::DOUBLE::DECIMAL(18,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(18, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(18, 1). Overflowing on NaN SELECT 'nan'::DOUBLE::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on NaN +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on NaN SELECT 'inf'::DOUBLE; @@ -127,23 +127,23 @@ SELECT 'inf'::DOUBLE; -- cannot cast nan, inf or -inf to these types SELECT 'inf'::DOUBLE::INT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value inf to type Int32 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value inf to type Int32 SELECT 'inf'::DOUBLE::DECIMAL(4,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(4, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(4, 1). Overflowing on inf SELECT 'inf'::DOUBLE::DECIMAL(9,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(9, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(9, 1). Overflowing on inf SELECT 'inf'::DOUBLE::DECIMAL(18,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(18, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(18, 1). Overflowing on inf SELECT 'inf'::DOUBLE::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on inf SELECT '-inf'::DOUBLE; @@ -156,23 +156,23 @@ SELECT '-inf'::DOUBLE; -- cannot cast nan, inf or -inf to these types SELECT '-inf'::DOUBLE::INT; -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Can't cast value -inf to type Int32 +Error: 3001(EngineExecuteQuery), Cast error: Can't cast value -inf to type Int32 SELECT '-inf'::DOUBLE::DECIMAL(4,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(4, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(4, 1). Overflowing on -inf SELECT '-inf'::DOUBLE::DECIMAL(9,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(9, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(9, 1). Overflowing on -inf SELECT '-inf'::DOUBLE::DECIMAL(18,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(18, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(18, 1). Overflowing on -inf SELECT '-inf'::DOUBLE::DECIMAL(38,1); -Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast to Decimal128(38, 1). Overflowing on -inf +Error: 3001(EngineExecuteQuery), Cast error: Cannot cast to Decimal128(38, 1). Overflowing on -inf -- we can cast nan, inf and -inf between floats and doubles, as well as to/from strings SELECT 'nan'::FLOAT::VARCHAR; diff --git a/tests/cases/standalone/common/types/timestamp/timestamp.result b/tests/cases/standalone/common/types/timestamp/timestamp.result index 2c21d1390c..91bd372303 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp.result +++ b/tests/cases/standalone/common/types/timestamp/timestamp.result @@ -32,15 +32,15 @@ SELECT timestamp ' 2017-07-23 13:10:11 '; SELECT timestamp ' 2017-07-23 13:10:11 AA'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Invalid timezone "AA": 'AA' is not a valid timezone +Error: 3001(EngineExecuteQuery), Parser error: Invalid timezone "AA": 'AA' is not a valid timezone SELECT timestamp 'AA2017-07-23 13:10:11'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from 'AA2017-07-23 13:10:11': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from 'AA2017-07-23 13:10:11': error parsing date SELECT timestamp '2017-07-23A13:10:11'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '2017-07-23A13:10:11': invalid timestamp separator +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '2017-07-23A13:10:11': invalid timestamp separator SELECT t FROM timestamp ORDER BY t; @@ -106,14 +106,14 @@ SELECT t-t FROM timestamp; +---------------------------+ | timestamp.t - timestamp.t | +---------------------------+ -| PT0S | +| P0D | | | -| PT0S | -| PT0S | -| PT0S | -| PT0S | -| PT0S | -| PT0S | +| P0D | +| P0D | +| P0D | +| P0D | +| P0D | +| P0D | +---------------------------+ SELECT EXTRACT(YEAR from TIMESTAMP '1992-01-01 01:01:01'); @@ -166,11 +166,11 @@ SELECT t::TIME FROM timestamp WHERE EXTRACT(YEAR from t)=2007 ORDER BY 1; SELECT (DATE '1992-01-01')::TIMESTAMP; -+---------------------+ -| Utf8("1992-01-01") | -+---------------------+ -| 1992-01-01T00:00:00 | -+---------------------+ ++---------------------------------------------------------------------+ +| arrow_cast(Utf8("1992-01-01"),Utf8("Timestamp(Millisecond, None)")) | ++---------------------------------------------------------------------+ +| 1992-01-01T00:00:00 | ++---------------------------------------------------------------------+ SELECT TIMESTAMP '2008-01-01 00:00:01.5'::VARCHAR; @@ -182,11 +182,11 @@ SELECT TIMESTAMP '2008-01-01 00:00:01.5'::VARCHAR; SELECT TIMESTAMP '-8-01-01 00:00:01.5'::VARCHAR; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '-8-01-01 00:00:01.5': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '-8-01-01 00:00:01.5': error parsing date SELECT TIMESTAMP '100000-01-01 00:00:01.5'::VARCHAR; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '100000-01-01 00:00:01.5': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '100000-01-01 00:00:01.5': error parsing date DROP TABLE timestamp; diff --git a/tests/cases/standalone/common/types/timestamp/timestamp_limits.result b/tests/cases/standalone/common/types/timestamp/timestamp_limits.result index 4680d298f0..a3c8c619b4 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp_limits.result +++ b/tests/cases/standalone/common/types/timestamp/timestamp_limits.result @@ -9,73 +9,73 @@ select timestamp '1970-01-01'; select '290309-12-22 (BC) 00:00:00'::timestamp; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select '290309-12-21 (BC) 12:59:59.999999'::timestamp; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-21 (BC) 12:59:59.999999': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-21 (BC) 12:59:59.999999': error parsing date select '290309-12-22 (BC) 00:00:00'::timestamp + interval '1 day'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select '290309-12-22 (BC) 00:00:00'::timestamp - interval '1 microsecond'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select '290309-12-22 (BC) 00:00:00'::timestamp - interval '1 second'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select '290309-12-22 (BC) 00:00:00'::timestamp - interval '1 day'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select '290309-12-22 (BC) 00:00:00'::timestamp - interval '1 month'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select '290309-12-22 (BC) 00:00:00'::timestamp - interval '1 year'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00': error parsing date select timestamp '294247-01-10 04:00:54.775806'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date select timestamp '294247-01-10 04:00:54.775807'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775807': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775807': error parsing date select timestamp '294247-01-10 04:00:54.775806' + interval '1 microsecond'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date select timestamp '294247-01-10 04:00:54.775806' + interval '1 second'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date select timestamp '294247-01-10 04:00:54.775806' + interval '1 hour'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date select timestamp '294247-01-10 04:00:54.775806' + interval '1 day'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date select timestamp '294247-01-10 04:00:54.775806' + interval '1 month'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date select timestamp '294247-01-10 04:00:54.775806' + interval '1 year'; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54.775806': error parsing date SELECT '290309-12-22 (BC) 00:00:00+07:00'::TIMESTAMP; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00+07:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '290309-12-22 (BC) 00:00:00+07:00': error parsing date SELECT '294247-01-10 04:00:54-07:00'::TIMESTAMP; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '294247-01-10 04:00:54-07:00': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '294247-01-10 04:00:54-07:00': error parsing date diff --git a/tests/cases/standalone/common/types/timestamp/timestamp_precision.result b/tests/cases/standalone/common/types/timestamp/timestamp_precision.result index 03126f2806..0587aa72a6 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp_precision.result +++ b/tests/cases/standalone/common/types/timestamp/timestamp_precision.result @@ -29,59 +29,59 @@ Error: 3001(EngineExecuteQuery), Execution error: Date part 'MICROSECONDS' not s -- any other precision is rounded up (e.g. 1/2 -> 3, 4/5 -> 6, 7/8 -> 9) SELECT TIMESTAMP '2020-01-01 01:23:45.123456789'::TIMESTAMP(0); -+---------------------------------------+ -| Utf8("2020-01-01 01:23:45.123456789") | -+---------------------------------------+ -| 2020-01-01T01:23:45 | -+---------------------------------------+ ++-----------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.123456789"),Utf8("Timestamp(Second, None)")) | ++-----------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45 | ++-----------------------------------------------------------------------------------+ SELECT TIMESTAMP '2020-01-01 01:23:45.123456789'::TIMESTAMP(3); -+---------------------------------------+ -| Utf8("2020-01-01 01:23:45.123456789") | -+---------------------------------------+ -| 2020-01-01T01:23:45.123 | -+---------------------------------------+ ++----------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.123456789"),Utf8("Timestamp(Millisecond, None)")) | ++----------------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45.123 | ++----------------------------------------------------------------------------------------+ SELECT TIMESTAMP '2020-01-01 01:23:45.123456789'::TIMESTAMP(6); -+---------------------------------------+ -| Utf8("2020-01-01 01:23:45.123456789") | -+---------------------------------------+ -| 2020-01-01T01:23:45.123456 | -+---------------------------------------+ ++----------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.123456789"),Utf8("Timestamp(Microsecond, None)")) | ++----------------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45.123456 | ++----------------------------------------------------------------------------------------+ SELECT TIMESTAMP '2020-01-01 01:23:45.123456789'::TIMESTAMP(9); -+---------------------------------------+ -| Utf8("2020-01-01 01:23:45.123456789") | -+---------------------------------------+ -| 2020-01-01T01:23:45.123456789 | -+---------------------------------------+ ++---------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.123456789"),Utf8("Timestamp(Nanosecond, None)")) | ++---------------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45.123456789 | ++---------------------------------------------------------------------------------------+ SELECT TIMESTAMP '2020-01-01 01:23:45.12'::TIMESTAMP(3); -+--------------------------------+ -| Utf8("2020-01-01 01:23:45.12") | -+--------------------------------+ -| 2020-01-01T01:23:45.120 | -+--------------------------------+ ++---------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.12"),Utf8("Timestamp(Millisecond, None)")) | ++---------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45.120 | ++---------------------------------------------------------------------------------+ SELECT TIMESTAMP '2020-01-01 01:23:45.12345'::TIMESTAMP(6); -+-----------------------------------+ -| Utf8("2020-01-01 01:23:45.12345") | -+-----------------------------------+ -| 2020-01-01T01:23:45.123450 | -+-----------------------------------+ ++------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.12345"),Utf8("Timestamp(Microsecond, None)")) | ++------------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45.123450 | ++------------------------------------------------------------------------------------+ SELECT TIMESTAMP '2020-01-01 01:23:45.12345678'::TIMESTAMP(9); -+--------------------------------------+ -| Utf8("2020-01-01 01:23:45.12345678") | -+--------------------------------------+ -| 2020-01-01T01:23:45.123456780 | -+--------------------------------------+ ++--------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2020-01-01 01:23:45.12345678"),Utf8("Timestamp(Nanosecond, None)")) | ++--------------------------------------------------------------------------------------+ +| 2020-01-01T01:23:45.123456780 | ++--------------------------------------------------------------------------------------+ DROP TABLE ts_precision; diff --git a/tests/cases/standalone/common/types/timestamp/timestamp_types.result b/tests/cases/standalone/common/types/timestamp/timestamp_types.result index e78b11dec3..e134d932c6 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp_types.result +++ b/tests/cases/standalone/common/types/timestamp/timestamp_types.result @@ -25,11 +25,11 @@ SELECT extract(YEAR from sec),extract( YEAR from milli),extract(YEAR from nano) SELECT nano::TIMESTAMP, milli::TIMESTAMP,sec::TIMESTAMP from timestamp; -+-------------------------+-------------------------+---------------------+ -| timestamp.nano | milli | timestamp.sec | -+-------------------------+-------------------------+---------------------+ -| 2008-01-01T00:00:01.889 | 2008-01-01T00:00:01.594 | 2008-01-01T00:00:01 | -+-------------------------+-------------------------+---------------------+ ++-----------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------------+ +| arrow_cast(timestamp.nano,Utf8("Timestamp(Millisecond, None)")) | arrow_cast(timestamp.milli,Utf8("Timestamp(Millisecond, None)")) | arrow_cast(timestamp.sec,Utf8("Timestamp(Millisecond, None)")) | ++-----------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------------+ +| 2008-01-01T00:00:01.889 | 2008-01-01T00:00:01.594 | 2008-01-01T00:00:01 | ++-----------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------------+ SELECT micro::TIMESTAMP_S as m1, micro::TIMESTAMP_MS as m2,micro::TIMESTAMP_NS as m3 from timestamp; @@ -49,7 +49,7 @@ Affected Rows: 1 select '90000-01-19 03:14:07.999999'::TIMESTAMP_US::TIMESTAMP_NS; -Error: 3001(EngineExecuteQuery), DataFusion error: Parser error: Error parsing timestamp from '90000-01-19 03:14:07.999999': error parsing date +Error: 3001(EngineExecuteQuery), Parser error: Error parsing timestamp from '90000-01-19 03:14:07.999999': error parsing date select sec::DATE from timestamp; @@ -113,63 +113,63 @@ select nano::TIME from timestamp; select sec::TIMESTAMP_MS from timestamp; -+---------------------+ -| timestamp.sec | -+---------------------+ -| 2008-01-01T00:00:01 | -| 2008-01-01T00:00:51 | -| 2008-01-01T00:00:11 | -+---------------------+ ++----------------------------------------------------------------+ +| arrow_cast(timestamp.sec,Utf8("Timestamp(Millisecond, None)")) | ++----------------------------------------------------------------+ +| 2008-01-01T00:00:01 | +| 2008-01-01T00:00:51 | +| 2008-01-01T00:00:11 | ++----------------------------------------------------------------+ select sec::TIMESTAMP_NS from timestamp; -+---------------------+ -| timestamp.sec | -+---------------------+ -| 2008-01-01T00:00:01 | -| 2008-01-01T00:00:51 | -| 2008-01-01T00:00:11 | -+---------------------+ ++---------------------------------------------------------------+ +| arrow_cast(timestamp.sec,Utf8("Timestamp(Nanosecond, None)")) | ++---------------------------------------------------------------+ +| 2008-01-01T00:00:01 | +| 2008-01-01T00:00:51 | +| 2008-01-01T00:00:11 | ++---------------------------------------------------------------+ select milli::TIMESTAMP_SEC from timestamp; -+---------------------+ -| timestamp.milli | -+---------------------+ -| 2008-01-01T00:00:01 | -| 2008-01-01T00:00:01 | -| 2008-01-01T00:00:01 | -+---------------------+ ++-------------------------------------------------------------+ +| arrow_cast(timestamp.milli,Utf8("Timestamp(Second, None)")) | ++-------------------------------------------------------------+ +| 2008-01-01T00:00:01 | +| 2008-01-01T00:00:01 | +| 2008-01-01T00:00:01 | ++-------------------------------------------------------------+ select milli::TIMESTAMP_NS from timestamp; -+-------------------------+ -| timestamp.milli | -+-------------------------+ -| 2008-01-01T00:00:01.594 | -| 2008-01-01T00:00:01.894 | -| 2008-01-01T00:00:01.794 | -+-------------------------+ ++-----------------------------------------------------------------+ +| arrow_cast(timestamp.milli,Utf8("Timestamp(Nanosecond, None)")) | ++-----------------------------------------------------------------+ +| 2008-01-01T00:00:01.594 | +| 2008-01-01T00:00:01.894 | +| 2008-01-01T00:00:01.794 | ++-----------------------------------------------------------------+ select nano::TIMESTAMP_SEC from timestamp; -+---------------------+ -| timestamp.nano | -+---------------------+ -| 2008-01-01T00:00:01 | -| 2008-01-01T00:00:01 | -| 2008-01-01T00:00:01 | -+---------------------+ ++------------------------------------------------------------+ +| arrow_cast(timestamp.nano,Utf8("Timestamp(Second, None)")) | ++------------------------------------------------------------+ +| 2008-01-01T00:00:01 | +| 2008-01-01T00:00:01 | +| 2008-01-01T00:00:01 | ++------------------------------------------------------------+ select nano::TIMESTAMP_MS from timestamp; -+-------------------------+ -| timestamp.nano | -+-------------------------+ -| 2008-01-01T00:00:01.889 | -| 2008-01-01T00:00:01.999 | -| 2008-01-01T00:00:01.899 | -+-------------------------+ ++-----------------------------------------------------------------+ +| arrow_cast(timestamp.nano,Utf8("Timestamp(Millisecond, None)")) | ++-----------------------------------------------------------------+ +| 2008-01-01T00:00:01.889 | +| 2008-01-01T00:00:01.999 | +| 2008-01-01T00:00:01.899 | ++-----------------------------------------------------------------+ select sec from timestamp order by sec; @@ -276,99 +276,99 @@ select timestamp.nano from timestamp inner join timestamp_two on (timestamp.nan select '2008-01-01 00:00:11'::TIMESTAMP_US = '2008-01-01 00:00:11'::TIMESTAMP_MS; -+-----------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11") = Utf8("2008-01-01 00:00:11") | -+-----------------------------------------------------------+ -| true | -+-----------------------------------------------------------+ ++-------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Microsecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Millisecond, None)")) | ++-------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++-------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11'::TIMESTAMP_US = '2008-01-01 00:00:11'::TIMESTAMP_NS; -+-----------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11") = Utf8("2008-01-01 00:00:11") | -+-----------------------------------------------------------+ -| true | -+-----------------------------------------------------------+ ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Microsecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Nanosecond, None)")) | ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11'::TIMESTAMP_US = '2008-01-01 00:00:11'::TIMESTAMP_S; -+-----------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11") = Utf8("2008-01-01 00:00:11") | -+-----------------------------------------------------------+ -| true | -+-----------------------------------------------------------+ ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Microsecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Second, None)")) | ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11.1'::TIMESTAMP_US = '2008-01-01 00:00:11'::TIMESTAMP_MS; -+-------------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11.1") = Utf8("2008-01-01 00:00:11") | -+-------------------------------------------------------------+ -| false | -+-------------------------------------------------------------+ ++---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Microsecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Millisecond, None)")) | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| false | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11.1'::TIMESTAMP_US = '2008-01-01 00:00:11'::TIMESTAMP_NS; -+-------------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11.1") = Utf8("2008-01-01 00:00:11") | -+-------------------------------------------------------------+ -| false | -+-------------------------------------------------------------+ ++--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Microsecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Nanosecond, None)")) | ++--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| false | ++--------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11.1'::TIMESTAMP_US = '2008-01-01 00:00:11.1'::TIMESTAMP_S; -+---------------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11.1") = Utf8("2008-01-01 00:00:11.1") | -+---------------------------------------------------------------+ -| true | -+---------------------------------------------------------------+ ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Microsecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Second, None)")) | ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11.1'::TIMESTAMP_MS = '2008-01-01 00:00:11'::TIMESTAMP_NS; -+-------------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11.1") = Utf8("2008-01-01 00:00:11") | -+-------------------------------------------------------------+ -| false | -+-------------------------------------------------------------+ ++--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Millisecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Nanosecond, None)")) | ++--------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| false | ++--------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11.1'::TIMESTAMP_MS = '2008-01-01 00:00:11'::TIMESTAMP_S; -+-------------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11.1") = Utf8("2008-01-01 00:00:11") | -+-------------------------------------------------------------+ -| true | -+-------------------------------------------------------------+ ++----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Millisecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Second, None)")) | ++----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++----------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11.1'::TIMESTAMP_NS = '2008-01-01 00:00:11'::TIMESTAMP_S; -+-------------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11.1") = Utf8("2008-01-01 00:00:11") | -+-------------------------------------------------------------+ -| true | -+-------------------------------------------------------------+ ++---------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11.1"),Utf8("Timestamp(Nanosecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Second, None)")) | ++---------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++---------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11'::TIMESTAMP_MS = '2008-01-01 00:00:11'::TIMESTAMP_NS; -+-----------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11") = Utf8("2008-01-01 00:00:11") | -+-----------------------------------------------------------+ -| true | -+-----------------------------------------------------------+ ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Millisecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Nanosecond, None)")) | ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++------------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11'::TIMESTAMP_MS = '2008-01-01 00:00:11'::TIMESTAMP_S; -+-----------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11") = Utf8("2008-01-01 00:00:11") | -+-----------------------------------------------------------+ -| true | -+-----------------------------------------------------------+ ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Millisecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Second, None)")) | ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ select '2008-01-01 00:00:11'::TIMESTAMP_NS = '2008-01-01 00:00:11'::TIMESTAMP_S; -+-----------------------------------------------------------+ -| Utf8("2008-01-01 00:00:11") = Utf8("2008-01-01 00:00:11") | -+-----------------------------------------------------------+ -| true | -+-----------------------------------------------------------+ ++-------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Nanosecond, None)")) = arrow_cast(Utf8("2008-01-01 00:00:11"),Utf8("Timestamp(Second, None)")) | ++-------------------------------------------------------------------------------------------------------------------------------------------------------+ +| true | ++-------------------------------------------------------------------------------------------------------------------------------------------------------+ DROP TABLE timestamp; diff --git a/tests/cases/standalone/common/types/timestamp/timestamp_tz.result b/tests/cases/standalone/common/types/timestamp/timestamp_tz.result index 8084c46457..0d00d39e63 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp_tz.result +++ b/tests/cases/standalone/common/types/timestamp/timestamp_tz.result @@ -8,25 +8,25 @@ select timestamptz '2021-11-15 02:30:00'; select '2021-11-15 02:30:00'::TIMESTAMP::TIMESTAMPTZ; -+-----------------------------+ -| Utf8("2021-11-15 02:30:00") | -+-----------------------------+ -| 2021-11-15T02:30:00 | -+-----------------------------+ ++-------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(arrow_cast(Utf8("2021-11-15 02:30:00"),Utf8("Timestamp(Millisecond, None)")),Utf8("Timestamp(Millisecond, None)")) | ++-------------------------------------------------------------------------------------------------------------------------------+ +| 2021-11-15T02:30:00 | ++-------------------------------------------------------------------------------------------------------------------------------+ SELECT '2021-04-29 10:50:09-05'::TIMESTAMPTZ::DATE; -+--------------------------------+ -| Utf8("2021-04-29 10:50:09-05") | -+--------------------------------+ -| 2021-04-29 | -+--------------------------------+ ++---------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2021-04-29 10:50:09-05"),Utf8("Timestamp(Millisecond, None)")) | ++---------------------------------------------------------------------------------+ +| 2021-04-29 | ++---------------------------------------------------------------------------------+ SELECT '2021-04-29 10:50:09-05'::TIMESTAMPTZ::TIME; -+--------------------------------+ -| Utf8("2021-04-29 10:50:09-05") | -+--------------------------------+ -| 15:50:09 | -+--------------------------------+ ++---------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2021-04-29 10:50:09-05"),Utf8("Timestamp(Millisecond, None)")) | ++---------------------------------------------------------------------------------+ +| 15:50:09 | ++---------------------------------------------------------------------------------+ diff --git a/tests/cases/standalone/limit/limit.result b/tests/cases/standalone/limit/limit.result index 0d39bee3e1..ef2bf90a20 100644 --- a/tests/cases/standalone/limit/limit.result +++ b/tests/cases/standalone/limit/limit.result @@ -9,7 +9,7 @@ EXPLAIN SELECT * FROM (SELECT SUM(number) FROM numbers LIMIT 100000000000) LIMIT | plan_type | plan | +---------------+----------------------------------+ | logical_plan | MergeScan [is_placeholder=false] | -| physical_plan | EmptyExec: produce_one_row=false | +| physical_plan | EmptyExec | | | | +---------------+----------------------------------+ @@ -19,7 +19,7 @@ EXPLAIN SELECT * FROM (SELECT SUM(number) FROM numbers LIMIT 100000000000) WHERE | plan_type | plan | +---------------+----------------------------------+ | logical_plan | MergeScan [is_placeholder=false] | -| physical_plan | EmptyExec: produce_one_row=false | +| physical_plan | EmptyExec | | | | +---------------+----------------------------------+ diff --git a/tests/cases/standalone/optimizer/filter_push_down.result b/tests/cases/standalone/optimizer/filter_push_down.result index 7e7cb8ca30..06ee859d4a 100644 --- a/tests/cases/standalone/optimizer/filter_push_down.result +++ b/tests/cases/standalone/optimizer/filter_push_down.result @@ -178,16 +178,16 @@ SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHER | 3 | +---+ --- TODO(LFC): Somehow the following SQL does not order by column 1 under new DataFusion occasionally. Should further investigate it. Comment it out temporarily. --- expected: --- +---+---+--------------+ --- | a | b | ROW_NUMBER() | --- +---+---+--------------+ --- | 1 | 1 | 1 | --- | 2 | 2 | 5 | --- | 3 | 3 | 9 | --- +---+---+--------------+ --- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; +SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; + ++---+---+--------------------------------------------------------------------------------------------------------------------+ +| a | b | ROW_NUMBER() ORDER BY [i1.i ASC NULLS LAST, i2.i ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW | ++---+---+--------------------------------------------------------------------------------------------------------------------+ +| 1 | 1 | 1 | +| 2 | 2 | 5 | +| 3 | 3 | 9 | ++---+---+--------------------------------------------------------------------------------------------------------------------+ + -- TODO(ruihang): Invalid argument error: must either specify a row count or at least one column -- SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1; SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1; diff --git a/tests/cases/standalone/optimizer/filter_push_down.sql b/tests/cases/standalone/optimizer/filter_push_down.sql index b8ff96d328..b2cd11582d 100644 --- a/tests/cases/standalone/optimizer/filter_push_down.sql +++ b/tests/cases/standalone/optimizer/filter_push_down.sql @@ -47,16 +47,7 @@ SELECT * FROM (SELECT DISTINCT i1.i AS a, i2.i AS b FROM integers i1, integers i SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHERE i=3; --- TODO(LFC): Somehow the following SQL does not order by column 1 under new DataFusion occasionally. Should further investigate it. Comment it out temporarily. --- expected: --- +---+---+--------------+ --- | a | b | ROW_NUMBER() | --- +---+---+--------------+ --- | 1 | 1 | 1 | --- | 2 | 2 | 5 | --- | 3 | 3 | 9 | --- +---+---+--------------+ --- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; +SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1; -- TODO(ruihang): Invalid argument error: must either specify a row count or at least one column -- SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;