mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-26 16:10:02 +00:00
Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4bb9ceb63b | ||
|
|
38456638f8 | ||
|
|
97c0b1f5c1 | ||
|
|
4fc7f12360 | ||
|
|
ed17997449 | ||
|
|
849ae8ebb6 | ||
|
|
a0587e2e87 | ||
|
|
1ed71169ac | ||
|
|
e62f0e2b64 | ||
|
|
f92e753a34 | ||
|
|
a22b016f90 | ||
|
|
7a9fa99069 | ||
|
|
d808e7be7e | ||
|
|
8e22fcfd5c | ||
|
|
26729c31a6 | ||
|
|
b73617eaba | ||
|
|
3b909f63e3 | ||
|
|
0d4e07eddd | ||
|
|
b94ce9019d | ||
|
|
3dcd40c4ba | ||
|
|
a67803d0e9 | ||
|
|
aa7e7942f8 | ||
|
|
f1b7581dc3 | ||
|
|
cd761df369 | ||
|
|
0cea6ae64d |
40
.github/scripts/deploy-greptimedb.sh
vendored
40
.github/scripts/deploy-greptimedb.sh
vendored
@@ -3,12 +3,14 @@
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.24.0}"
|
||||
KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.32.0}"
|
||||
ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
|
||||
DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
|
||||
GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
|
||||
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
|
||||
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
|
||||
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
|
||||
ETCD_CHART_VERSION="${ETCD_CHART_VERSION:-12.0.8}"
|
||||
ETCD_IMAGE_TAG="${ETCD_IMAGE_TAG:-3.6.1-debian-12-r3}"
|
||||
|
||||
# Create a cluster with 1 control-plane node and 5 workers.
|
||||
function create_kind_cluster() {
|
||||
@@ -35,10 +37,16 @@ function add_greptime_chart() {
|
||||
function deploy_etcd_cluster() {
|
||||
local namespace="$1"
|
||||
|
||||
helm install etcd "$ETCD_CHART" \
|
||||
helm upgrade --install etcd "$ETCD_CHART" \
|
||||
--version "$ETCD_CHART_VERSION" \
|
||||
--create-namespace \
|
||||
--set replicaCount=3 \
|
||||
--set auth.rbac.create=false \
|
||||
--set auth.rbac.token.enabled=false \
|
||||
--set global.security.allowInsecureImages=true \
|
||||
--set image.registry=docker.io \
|
||||
--set image.repository=greptime/etcd \
|
||||
--set image.tag="$ETCD_IMAGE_TAG" \
|
||||
-n "$namespace"
|
||||
|
||||
# Wait for etcd cluster to be ready.
|
||||
@@ -48,7 +56,8 @@ function deploy_etcd_cluster() {
|
||||
# Deploy greptimedb-operator.
|
||||
function deploy_greptimedb_operator() {
|
||||
# Use the latest chart and image.
|
||||
helm install greptimedb-operator greptime/greptimedb-operator \
|
||||
helm upgrade --install greptimedb-operator greptime/greptimedb-operator \
|
||||
--create-namespace \
|
||||
--set image.tag=latest \
|
||||
-n "$DEFAULT_INSTALL_NAMESPACE"
|
||||
|
||||
@@ -66,9 +75,11 @@ function deploy_greptimedb_cluster() {
|
||||
|
||||
deploy_etcd_cluster "$install_namespace"
|
||||
|
||||
helm install "$cluster_name" greptime/greptimedb-cluster \
|
||||
helm upgrade --install "$cluster_name" greptime/greptimedb-cluster \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
|
||||
-n "$install_namespace"
|
||||
|
||||
# Wait for greptimedb cluster to be ready.
|
||||
@@ -101,15 +112,17 @@ function deploy_greptimedb_cluster_with_s3_storage() {
|
||||
|
||||
deploy_etcd_cluster "$install_namespace"
|
||||
|
||||
helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
|
||||
helm upgrade --install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
--set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
|
||||
--set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
|
||||
--set storage.s3.region="$AWS_REGION" \
|
||||
--set storage.s3.root="$DATA_ROOT" \
|
||||
--set storage.credentials.secretName=s3-credentials \
|
||||
--set storage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
|
||||
--set storage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
|
||||
--set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
|
||||
--set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
|
||||
--set objectStorage.s3.region="$AWS_REGION" \
|
||||
--set objectStorage.s3.root="$DATA_ROOT" \
|
||||
--set objectStorage.credentials.secretName=s3-credentials \
|
||||
--set objectStorage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
|
||||
--set objectStorage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
|
||||
|
||||
# Wait for greptimedb cluster to be ready.
|
||||
while true; do
|
||||
@@ -134,7 +147,8 @@ function deploy_greptimedb_cluster_with_s3_storage() {
|
||||
# Deploy standalone greptimedb.
|
||||
# It will expose cluster service ports as '34000', '34001', '34002', '34003' to local access.
|
||||
function deploy_standalone_greptimedb() {
|
||||
helm install greptimedb-standalone greptime/greptimedb-standalone \
|
||||
helm upgrade --install greptimedb-standalone greptime/greptimedb-standalone \
|
||||
--create-namespace \
|
||||
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
|
||||
-n "$DEFAULT_INSTALL_NAMESPACE"
|
||||
|
||||
|
||||
6
.github/workflows/semantic-pull-request.yml
vendored
6
.github/workflows/semantic-pull-request.yml
vendored
@@ -1,7 +1,7 @@
|
||||
name: "Semantic Pull Request"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- reopened
|
||||
@@ -12,9 +12,9 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: write
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
check:
|
||||
|
||||
161
Cargo.lock
generated
161
Cargo.lock
generated
@@ -218,7 +218,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -737,7 +737,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1387,7 +1387,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1422,7 +1422,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1763,7 +1763,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1807,7 +1807,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -1816,7 +1816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1848,7 +1848,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1889,7 +1889,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1951,7 +1951,7 @@ dependencies = [
|
||||
"snafu 0.8.6",
|
||||
"stat",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -1997,7 +1997,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -2019,11 +2019,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2049,7 +2049,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -2084,7 +2084,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.8",
|
||||
"common-error",
|
||||
@@ -2097,7 +2097,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-macro",
|
||||
"http 1.3.1",
|
||||
@@ -2108,7 +2108,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-event-recorder"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2130,7 +2130,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2152,7 +2152,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -2210,7 +2210,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2227,7 +2227,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2260,7 +2260,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2280,7 +2280,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"greptime-proto",
|
||||
"once_cell",
|
||||
@@ -2291,7 +2291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common-error",
|
||||
@@ -2307,7 +2307,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2379,7 +2379,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2388,11 +2388,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2404,7 +2404,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-stream",
|
||||
@@ -2433,7 +2433,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2443,7 +2443,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2468,7 +2468,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2489,7 +2489,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.40",
|
||||
@@ -2518,7 +2518,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-session"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"strum 0.27.1",
|
||||
@@ -2526,7 +2526,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-sql"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -2544,7 +2544,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-base",
|
||||
@@ -2573,7 +2573,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-grpc",
|
||||
@@ -2586,7 +2586,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2604,7 +2604,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"cargo-manifest",
|
||||
@@ -2615,7 +2615,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2638,7 +2638,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-workload"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"common-telemetry",
|
||||
"serde",
|
||||
@@ -3865,7 +3865,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3918,7 +3918,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.23",
|
||||
@@ -3928,7 +3928,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -4602,7 +4602,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4734,7 +4734,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -4801,7 +4801,7 @@ dependencies = [
|
||||
"sql",
|
||||
"store-api",
|
||||
"strum 0.27.1",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.13.1",
|
||||
@@ -4856,7 +4856,7 @@ checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -4919,7 +4919,7 @@ dependencies = [
|
||||
"sqlparser 0.55.0-greptime",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -6061,7 +6061,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -7001,7 +7001,7 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -7013,7 +7013,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -7284,8 +7284,7 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||
[[package]]
|
||||
name = "memcomparable"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "376101dbd964fc502d5902216e180f92b3d003b5cc3d2e40e044eb5470fca677"
|
||||
source = "git+https://github.com/v0y4g3r/memcomparable.git?rev=a07122dc03556bbd88ad66234cbea7efd3b23efb#a07122dc03556bbd88ad66234cbea7efd3b23efb"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"serde",
|
||||
@@ -7321,7 +7320,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7349,7 +7348,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7445,7 +7444,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7538,7 +7537,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito-codec"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bytes",
|
||||
@@ -7562,7 +7561,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7603,7 +7602,6 @@ dependencies = [
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
"log-store",
|
||||
"memcomparable",
|
||||
"mito-codec",
|
||||
"moka",
|
||||
"object-store",
|
||||
@@ -8297,7 +8295,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -8582,7 +8580,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -8640,7 +8638,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.55.0-greptime",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -8952,7 +8950,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9291,7 +9289,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -9447,7 +9445,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.40",
|
||||
@@ -9745,7 +9743,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"async-trait",
|
||||
@@ -10028,7 +10026,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-compression 0.4.19",
|
||||
"async-trait",
|
||||
@@ -10070,7 +10068,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -10135,7 +10133,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.55.0-greptime",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -11499,7 +11497,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -11622,7 +11620,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -11950,7 +11948,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-buffer",
|
||||
@@ -12008,7 +12006,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.40",
|
||||
@@ -12308,7 +12306,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "stat"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"nix 0.30.1",
|
||||
]
|
||||
@@ -12321,7 +12319,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -12468,11 +12466,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-function",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
@@ -12636,7 +12635,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12905,7 +12904,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -12949,7 +12948,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -13021,7 +13020,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.17.0",
|
||||
"substrait 0.17.2",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -73,7 +73,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.17.0"
|
||||
version = "0.17.2"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
|
||||
@@ -26,12 +26,11 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::timestamp::TimestampMicrosecond;
|
||||
use datatypes::timestamp::TimestampSecond;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector,
|
||||
StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
|
||||
UInt64VectorBuilder,
|
||||
StringVectorBuilder, TimestampSecondVector, TimestampSecondVectorBuilder, UInt64VectorBuilder,
|
||||
};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use partition::manager::PartitionInfo;
|
||||
@@ -129,17 +128,17 @@ impl InformationSchemaPartitions {
|
||||
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"create_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"update_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"check_time",
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
|
||||
@@ -212,7 +211,7 @@ struct InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder,
|
||||
partition_ordinal_positions: Int64VectorBuilder,
|
||||
partition_expressions: StringVectorBuilder,
|
||||
create_times: TimestampMicrosecondVectorBuilder,
|
||||
create_times: TimestampSecondVectorBuilder,
|
||||
partition_ids: UInt64VectorBuilder,
|
||||
}
|
||||
|
||||
@@ -232,7 +231,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
create_times: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
@@ -331,8 +330,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
.push(Some((index + 1) as i64));
|
||||
let expression = partition.partition_expr.as_ref().map(|e| e.to_string());
|
||||
self.partition_expressions.push(expression.as_deref());
|
||||
self.create_times.push(Some(TimestampMicrosecond::from(
|
||||
table_info.meta.created_on.timestamp_millis(),
|
||||
self.create_times.push(Some(TimestampSecond::from(
|
||||
table_info.meta.created_on.timestamp(),
|
||||
)));
|
||||
self.partition_ids.push(Some(partition.id.as_u64()));
|
||||
}
|
||||
@@ -349,8 +348,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
Arc::new(Int64Vector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new(
|
||||
Arc::new(TimestampMicrosecondVector::from(vec![None])),
|
||||
let null_timestamp_second_vector = Arc::new(ConstantVector::new(
|
||||
Arc::new(TimestampSecondVector::from(vec![None])),
|
||||
rows_num,
|
||||
));
|
||||
let partition_methods = Arc::new(ConstantVector::new(
|
||||
@@ -380,8 +379,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
null_i64_vector.clone(),
|
||||
Arc::new(self.create_times.finish()),
|
||||
// TODO(dennis): supports update_time
|
||||
null_timestampmicrosecond_vector.clone(),
|
||||
null_timestampmicrosecond_vector,
|
||||
null_timestamp_second_vector.clone(),
|
||||
null_timestamp_second_vector,
|
||||
null_i64_vector,
|
||||
null_string_vector.clone(),
|
||||
null_string_vector.clone(),
|
||||
|
||||
@@ -83,6 +83,20 @@ pub(crate) struct StoreConfig {
|
||||
}
|
||||
|
||||
impl StoreConfig {
|
||||
pub fn tls_config(&self) -> Option<TlsOption> {
|
||||
if self.backend_tls_mode != TlsMode::Disable {
|
||||
Some(TlsOption {
|
||||
mode: self.backend_tls_mode.clone(),
|
||||
cert_path: self.backend_tls_cert_path.clone(),
|
||||
key_path: self.backend_tls_key_path.clone(),
|
||||
ca_cert_path: self.backend_tls_ca_cert_path.clone(),
|
||||
watch: self.backend_tls_watch,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a [`KvBackendRef`] from the store configuration.
|
||||
pub async fn build(&self) -> Result<KvBackendRef, BoxedError> {
|
||||
let max_txn_ops = self.max_txn_ops;
|
||||
@@ -92,17 +106,7 @@ impl StoreConfig {
|
||||
} else {
|
||||
let kvbackend = match self.backend {
|
||||
BackendImpl::EtcdStore => {
|
||||
let tls_config = if self.backend_tls_mode != TlsMode::Disable {
|
||||
Some(TlsOption {
|
||||
mode: self.backend_tls_mode.clone(),
|
||||
cert_path: self.backend_tls_cert_path.clone(),
|
||||
key_path: self.backend_tls_key_path.clone(),
|
||||
ca_cert_path: self.backend_tls_ca_cert_path.clone(),
|
||||
watch: self.backend_tls_watch,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let tls_config = self.tls_config();
|
||||
let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
@@ -111,7 +115,8 @@ impl StoreConfig {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
BackendImpl::PostgresStore => {
|
||||
let table_name = &self.meta_table_name;
|
||||
let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, None)
|
||||
let tls_config = self.tls_config();
|
||||
let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, tls_config)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
let schema_name = self.meta_schema_name.as_deref();
|
||||
|
||||
@@ -196,7 +196,10 @@ pub async fn stream_to_parquet(
|
||||
concurrency: usize,
|
||||
) -> Result<usize> {
|
||||
let write_props = column_wise_config(
|
||||
WriterProperties::builder().set_compression(Compression::ZSTD(ZstdLevel::default())),
|
||||
WriterProperties::builder()
|
||||
.set_compression(Compression::ZSTD(ZstdLevel::default()))
|
||||
.set_statistics_truncate_length(None)
|
||||
.set_column_index_truncate_length(None),
|
||||
schema,
|
||||
)
|
||||
.build();
|
||||
|
||||
@@ -41,7 +41,12 @@ use datafusion_expr::{
|
||||
use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
|
||||
use datatypes::arrow::datatypes::{DataType, Field};
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
use crate::aggrs::aggr_wrapper::fix_order::FixStateUdafOrderingAnalyzer;
|
||||
use crate::function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
|
||||
|
||||
pub mod fix_order;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Returns the name of the state function for the given aggregate function name.
|
||||
/// The state function is used to compute the state of the aggregate function.
|
||||
@@ -57,6 +62,39 @@ pub fn aggr_merge_func_name(aggr_name: &str) -> String {
|
||||
format!("__{}_merge", aggr_name)
|
||||
}
|
||||
|
||||
/// Check if the given aggregate expression is steppable.
|
||||
/// As in if it can be split into multiple steps:
|
||||
/// i.e. on datanode first call `state(input)` then
|
||||
/// on frontend call `calc(merge(state))` to get the final result.
|
||||
pub fn is_all_aggr_exprs_steppable(aggr_exprs: &[Expr]) -> bool {
|
||||
aggr_exprs.iter().all(|expr| {
|
||||
if let Some(aggr_func) = get_aggr_func(expr) {
|
||||
if aggr_func.params.distinct {
|
||||
// Distinct aggregate functions are not steppable(yet).
|
||||
// TODO(discord9): support distinct aggregate functions.
|
||||
return false;
|
||||
}
|
||||
|
||||
// whether the corresponding state function exists in the registry
|
||||
FUNCTION_REGISTRY.is_aggr_func_exist(&aggr_state_func_name(aggr_func.func.name()))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
|
||||
let mut expr_ref = expr;
|
||||
while let Expr::Alias(alias) = expr_ref {
|
||||
expr_ref = &alias.expr;
|
||||
}
|
||||
if let Expr::AggregateFunction(aggr_func) = expr_ref {
|
||||
Some(aggr_func)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper to make an aggregate function out of the state and merge functions of the original aggregate function.
|
||||
/// It contains the original aggregate function, the state functions, and the merge function.
|
||||
///
|
||||
@@ -74,18 +112,6 @@ pub struct StepAggrPlan {
|
||||
pub lower_state: LogicalPlan,
|
||||
}
|
||||
|
||||
pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
|
||||
let mut expr_ref = expr;
|
||||
while let Expr::Alias(alias) = expr_ref {
|
||||
expr_ref = &alias.expr;
|
||||
}
|
||||
if let Expr::AggregateFunction(aggr_func) = expr_ref {
|
||||
Some(aggr_func)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl StateMergeHelper {
|
||||
/// Register all the `state` function of supported aggregate functions.
|
||||
/// Note that can't register `merge` function here, as it needs to be created from the original aggregate function with given input types.
|
||||
@@ -118,6 +144,7 @@ impl StateMergeHelper {
|
||||
}
|
||||
|
||||
/// Split an aggregate plan into two aggregate plans, one for the state function and one for the merge function.
|
||||
///
|
||||
pub fn split_aggr_node(aggr_plan: Aggregate) -> datafusion_common::Result<StepAggrPlan> {
|
||||
let aggr = {
|
||||
// certain aggr func need type coercion to work correctly, so we need to analyze the plan first.
|
||||
@@ -137,6 +164,15 @@ impl StateMergeHelper {
|
||||
let mut lower_aggr_exprs = vec![];
|
||||
let mut upper_aggr_exprs = vec![];
|
||||
|
||||
// group exprs for upper plan should refer to the output group expr as column from lower plan
|
||||
// to avoid re-compute group exprs again.
|
||||
let upper_group_exprs = aggr
|
||||
.group_expr
|
||||
.iter()
|
||||
.map(|c| c.qualified_name())
|
||||
.map(|(r, c)| Expr::Column(Column::new(r, c)))
|
||||
.collect();
|
||||
|
||||
for aggr_expr in aggr.aggr_expr.iter() {
|
||||
let Some(aggr_func) = get_aggr_func(aggr_expr) else {
|
||||
return Err(datafusion_common::DataFusionError::NotImplemented(format!(
|
||||
@@ -164,6 +200,7 @@ impl StateMergeHelper {
|
||||
|
||||
lower_aggr_exprs.push(expr);
|
||||
|
||||
// then create the merge function using the physical expression of the original aggregate function
|
||||
let (original_phy_expr, _filter, _ordering) = create_aggregate_expr_and_maybe_filter(
|
||||
aggr_expr,
|
||||
aggr.input.schema(),
|
||||
@@ -179,9 +216,15 @@ impl StateMergeHelper {
|
||||
let arg = Expr::Column(Column::new_unqualified(lower_state_output_col_name));
|
||||
let expr = AggregateFunction {
|
||||
func: Arc::new(merge_func.into()),
|
||||
// notice filter/order_by is not supported in the merge function, as it's not meaningful to have them in the merge phase.
|
||||
// do notice this order by is only removed in the outer logical plan, the physical plan still have order by and hence
|
||||
// can create correct accumulator with order by.
|
||||
params: AggregateFunctionParams {
|
||||
args: vec![arg],
|
||||
..aggr_func.params.clone()
|
||||
distinct: aggr_func.params.distinct,
|
||||
filter: None,
|
||||
order_by: vec![],
|
||||
null_treatment: aggr_func.params.null_treatment,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -198,10 +241,18 @@ impl StateMergeHelper {
|
||||
// update aggregate's output schema
|
||||
let lower_plan = lower_plan.recompute_schema()?;
|
||||
|
||||
let mut upper = aggr.clone();
|
||||
// should only affect two udaf `first_value/last_value`
|
||||
// which only them have meaningful order by field
|
||||
let fixed_lower_plan =
|
||||
FixStateUdafOrderingAnalyzer.analyze(lower_plan, &Default::default())?;
|
||||
|
||||
let upper = Aggregate::try_new(
|
||||
Arc::new(fixed_lower_plan.clone()),
|
||||
upper_group_exprs,
|
||||
upper_aggr_exprs.clone(),
|
||||
)?;
|
||||
let aggr_plan = LogicalPlan::Aggregate(aggr);
|
||||
upper.aggr_expr = upper_aggr_exprs;
|
||||
upper.input = Arc::new(lower_plan.clone());
|
||||
|
||||
// upper schema's output schema should be the same as the original aggregate plan's output schema
|
||||
let upper_check = upper;
|
||||
let upper_plan = LogicalPlan::Aggregate(upper_check).recompute_schema()?;
|
||||
@@ -213,7 +264,7 @@ impl StateMergeHelper {
|
||||
}
|
||||
|
||||
Ok(StepAggrPlan {
|
||||
lower_state: lower_plan,
|
||||
lower_state: fixed_lower_plan,
|
||||
upper_merge: upper_plan,
|
||||
})
|
||||
}
|
||||
@@ -224,13 +275,22 @@ impl StateMergeHelper {
|
||||
pub struct StateWrapper {
|
||||
inner: AggregateUDF,
|
||||
name: String,
|
||||
/// Default to empty, might get fixed by analyzer later
|
||||
ordering: Vec<FieldRef>,
|
||||
/// Default to false, might get fixed by analyzer later
|
||||
distinct: bool,
|
||||
}
|
||||
|
||||
impl StateWrapper {
|
||||
/// `state_index`: The index of the state in the output of the state function.
|
||||
pub fn new(inner: AggregateUDF) -> datafusion_common::Result<Self> {
|
||||
let name = aggr_state_func_name(inner.name());
|
||||
Ok(Self { inner, name })
|
||||
Ok(Self {
|
||||
inner,
|
||||
name,
|
||||
ordering: vec![],
|
||||
distinct: false,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &AggregateUDF {
|
||||
@@ -244,7 +304,19 @@ impl StateWrapper {
|
||||
&self,
|
||||
acc_args: &datafusion_expr::function::AccumulatorArgs,
|
||||
) -> datafusion_common::Result<FieldRef> {
|
||||
self.inner.return_field(acc_args.schema.fields())
|
||||
let input_fields = acc_args
|
||||
.exprs
|
||||
.iter()
|
||||
.map(|e| e.return_field(acc_args.schema))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
self.inner.return_field(&input_fields).inspect_err(|e| {
|
||||
common_telemetry::error!(
|
||||
"StateWrapper: {:#?}\nacc_args:{:?}\nerror:{:?}",
|
||||
&self,
|
||||
&acc_args,
|
||||
e
|
||||
);
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,6 +340,7 @@ impl AggregateUDFImpl for StateWrapper {
|
||||
};
|
||||
self.inner.accumulator(acc_args)?
|
||||
};
|
||||
|
||||
Ok(Box::new(StateAccum::new(inner, state_type)?))
|
||||
}
|
||||
|
||||
@@ -294,11 +367,22 @@ impl AggregateUDFImpl for StateWrapper {
|
||||
name: self.inner().name(),
|
||||
input_fields,
|
||||
return_field: self.inner.return_field(input_fields)?,
|
||||
// TODO(discord9): how to get this?, probably ok?
|
||||
ordering_fields: &[],
|
||||
is_distinct: false,
|
||||
// those args are also needed as they are vital to construct the state fields correctly.
|
||||
ordering_fields: &self.ordering,
|
||||
is_distinct: self.distinct,
|
||||
};
|
||||
let state_fields = self.inner.state_fields(state_fields_args)?;
|
||||
|
||||
let state_fields = state_fields
|
||||
.into_iter()
|
||||
.map(|f| {
|
||||
let mut f = f.as_ref().clone();
|
||||
// since state can be null when no input rows, so make all fields nullable
|
||||
f.set_nullable(true);
|
||||
Arc::new(f)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let struct_field = DataType::Struct(state_fields.into());
|
||||
Ok(struct_field)
|
||||
}
|
||||
@@ -363,6 +447,39 @@ impl Accumulator for StateAccum {
|
||||
.iter()
|
||||
.map(|s| s.to_array())
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let array_type = array
|
||||
.iter()
|
||||
.map(|a| a.data_type().clone())
|
||||
.collect::<Vec<_>>();
|
||||
let expected_type: Vec<_> = self
|
||||
.state_fields
|
||||
.iter()
|
||||
.map(|f| f.data_type().clone())
|
||||
.collect();
|
||||
if array_type != expected_type {
|
||||
debug!(
|
||||
"State mismatch, expected: {}, got: {} for expected fields: {:?} and given array types: {:?}",
|
||||
self.state_fields.len(),
|
||||
array.len(),
|
||||
self.state_fields,
|
||||
array_type,
|
||||
);
|
||||
let guess_schema = array
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, array)| {
|
||||
Field::new(
|
||||
format!("col_{index}[mismatch_state]").as_str(),
|
||||
array.data_type().clone(),
|
||||
true,
|
||||
)
|
||||
})
|
||||
.collect::<Fields>();
|
||||
let arr = StructArray::try_new(guess_schema, array, None)?;
|
||||
|
||||
return Ok(ScalarValue::Struct(Arc::new(arr)));
|
||||
}
|
||||
|
||||
let struct_array = StructArray::try_new(self.state_fields.clone(), array, None)?;
|
||||
Ok(ScalarValue::Struct(Arc::new(struct_array)))
|
||||
}
|
||||
@@ -401,7 +518,7 @@ pub struct MergeWrapper {
|
||||
merge_signature: Signature,
|
||||
/// The original physical expression of the aggregate function, can't store the original aggregate function directly, as PhysicalExpr didn't implement Any
|
||||
original_phy_expr: Arc<AggregateFunctionExpr>,
|
||||
original_input_types: Vec<DataType>,
|
||||
return_type: DataType,
|
||||
}
|
||||
impl MergeWrapper {
|
||||
pub fn new(
|
||||
@@ -412,13 +529,14 @@ impl MergeWrapper {
|
||||
let name = aggr_merge_func_name(inner.name());
|
||||
// the input type is actually struct type, which is the state fields of the original aggregate function.
|
||||
let merge_signature = Signature::user_defined(datafusion_expr::Volatility::Immutable);
|
||||
let return_type = inner.return_type(&original_input_types)?;
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
name,
|
||||
merge_signature,
|
||||
original_phy_expr,
|
||||
original_input_types,
|
||||
return_type,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -470,8 +588,7 @@ impl AggregateUDFImpl for MergeWrapper {
|
||||
/// so return fixed return type instead of using `arg_types` to determine the return type.
|
||||
fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
// The return type is the same as the original aggregate function's return type.
|
||||
let ret_type = self.inner.return_type(&self.original_input_types)?;
|
||||
Ok(ret_type)
|
||||
Ok(self.return_type.clone())
|
||||
}
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.merge_signature
|
||||
@@ -541,10 +658,11 @@ impl Accumulator for MergeAccum {
|
||||
})?;
|
||||
let fields = struct_arr.fields();
|
||||
if fields != &self.state_fields {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected state fields: {:?}, got: {:?}",
|
||||
debug!(
|
||||
"State fields mismatch, expected: {:?}, got: {:?}",
|
||||
self.state_fields, fields
|
||||
)));
|
||||
);
|
||||
// state fields mismatch might be acceptable by datafusion, continue
|
||||
}
|
||||
|
||||
// now fields should be the same, so we can merge the batch
|
||||
@@ -561,6 +679,3 @@ impl Accumulator for MergeAccum {
|
||||
self.inner.state()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
189
src/common/function/src/aggrs/aggr_wrapper/fix_order.rs
Normal file
189
src/common/function/src/aggrs/aggr_wrapper/fix_order.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use datafusion::config::ConfigOptions;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
|
||||
use datafusion_expr::{AggregateUDF, Expr, ExprSchemable, LogicalPlan};
|
||||
|
||||
use crate::aggrs::aggr_wrapper::StateWrapper;
|
||||
|
||||
/// Traverse the plan, found all `__<aggr_name>_state` and fix their ordering fields
|
||||
/// if their input aggr is with order by, this is currently only useful for `first_value` and `last_value` udaf
|
||||
///
|
||||
/// should be applied to datanode's query engine
|
||||
/// TODO(discord9): proper way to extend substrait's serde ability to allow carry more info for custom udaf with more info
|
||||
#[derive(Debug, Default)]
|
||||
pub struct FixStateUdafOrderingAnalyzer;
|
||||
|
||||
impl AnalyzerRule for FixStateUdafOrderingAnalyzer {
|
||||
fn name(&self) -> &str {
|
||||
"FixStateUdafOrderingAnalyzer"
|
||||
}
|
||||
|
||||
fn analyze(
|
||||
&self,
|
||||
plan: LogicalPlan,
|
||||
_config: &ConfigOptions,
|
||||
) -> datafusion_common::Result<LogicalPlan> {
|
||||
plan.rewrite_with_subqueries(&mut FixOrderingRewriter::new(true))
|
||||
.map(|t| t.data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Traverse the plan, found all `__<aggr_name>_state` and remove their ordering fields
|
||||
/// this is currently only useful for `first_value` and `last_value` udaf when need to encode to substrait
|
||||
///
|
||||
#[derive(Debug, Default)]
|
||||
pub struct UnFixStateUdafOrderingAnalyzer;
|
||||
|
||||
impl AnalyzerRule for UnFixStateUdafOrderingAnalyzer {
|
||||
fn name(&self) -> &str {
|
||||
"UnFixStateUdafOrderingAnalyzer"
|
||||
}
|
||||
|
||||
fn analyze(
|
||||
&self,
|
||||
plan: LogicalPlan,
|
||||
_config: &ConfigOptions,
|
||||
) -> datafusion_common::Result<LogicalPlan> {
|
||||
plan.rewrite_with_subqueries(&mut FixOrderingRewriter::new(false))
|
||||
.map(|t| t.data)
|
||||
}
|
||||
}
|
||||
|
||||
struct FixOrderingRewriter {
|
||||
/// once fixed, mark dirty, and always recompute schema from bottom up
|
||||
is_dirty: bool,
|
||||
/// if true, will add the ordering field from outer aggr expr
|
||||
/// if false, will remove the ordering field
|
||||
is_fix: bool,
|
||||
}
|
||||
|
||||
impl FixOrderingRewriter {
|
||||
pub fn new(is_fix: bool) -> Self {
|
||||
Self {
|
||||
is_dirty: false,
|
||||
is_fix,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for FixOrderingRewriter {
|
||||
type Node = LogicalPlan;
|
||||
|
||||
/// found all `__<aggr_name>_state` and fix their ordering fields
|
||||
/// if their input aggr is with order by
|
||||
fn f_up(
|
||||
&mut self,
|
||||
node: Self::Node,
|
||||
) -> datafusion_common::Result<datafusion_common::tree_node::Transformed<Self::Node>> {
|
||||
let LogicalPlan::Aggregate(mut aggregate) = node else {
|
||||
return if self.is_dirty {
|
||||
let node = node.recompute_schema()?;
|
||||
Ok(Transformed::yes(node))
|
||||
} else {
|
||||
Ok(Transformed::no(node))
|
||||
};
|
||||
};
|
||||
|
||||
// regex to match state udaf name
|
||||
for aggr_expr in &mut aggregate.aggr_expr {
|
||||
let new_aggr_expr = aggr_expr
|
||||
.clone()
|
||||
.transform_up(|expr| rewrite_expr(expr, &aggregate.input, self.is_fix))?;
|
||||
|
||||
if new_aggr_expr.transformed {
|
||||
*aggr_expr = new_aggr_expr.data;
|
||||
self.is_dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
if self.is_dirty {
|
||||
let node = LogicalPlan::Aggregate(aggregate).recompute_schema()?;
|
||||
debug!(
|
||||
"FixStateUdafOrderingAnalyzer: plan schema's field changed to {:?}",
|
||||
node.schema().fields()
|
||||
);
|
||||
|
||||
Ok(Transformed::yes(node))
|
||||
} else {
|
||||
Ok(Transformed::no(LogicalPlan::Aggregate(aggregate)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// first see the aggr node in expr
|
||||
/// as it could be nested aggr like alias(aggr(sort))
|
||||
/// if contained aggr expr have a order by, and the aggr name match the regex
|
||||
/// then we need to fix the ordering field of the state udaf
|
||||
/// to be the same as the aggr expr
|
||||
fn rewrite_expr(
|
||||
expr: Expr,
|
||||
aggregate_input: &Arc<LogicalPlan>,
|
||||
is_fix: bool,
|
||||
) -> Result<Transformed<Expr>, datafusion_common::DataFusionError> {
|
||||
let Expr::AggregateFunction(aggregate_function) = expr else {
|
||||
return Ok(Transformed::no(expr));
|
||||
};
|
||||
|
||||
let Some(old_state_wrapper) = aggregate_function
|
||||
.func
|
||||
.inner()
|
||||
.as_any()
|
||||
.downcast_ref::<StateWrapper>()
|
||||
else {
|
||||
return Ok(Transformed::no(Expr::AggregateFunction(aggregate_function)));
|
||||
};
|
||||
|
||||
let mut state_wrapper = old_state_wrapper.clone();
|
||||
if is_fix {
|
||||
// then always fix the ordering field&distinct flag and more
|
||||
let order_by = aggregate_function.params.order_by.clone();
|
||||
let ordering_fields: Vec<_> = order_by
|
||||
.iter()
|
||||
.map(|sort_expr| {
|
||||
sort_expr
|
||||
.expr
|
||||
.to_field(&aggregate_input.schema())
|
||||
.map(|(_, f)| f)
|
||||
})
|
||||
.collect::<datafusion_common::Result<Vec<_>>>()?;
|
||||
let distinct = aggregate_function.params.distinct;
|
||||
|
||||
// fixing up
|
||||
state_wrapper.ordering = ordering_fields;
|
||||
state_wrapper.distinct = distinct;
|
||||
} else {
|
||||
// remove the ordering field & distinct flag
|
||||
state_wrapper.ordering = vec![];
|
||||
state_wrapper.distinct = false;
|
||||
}
|
||||
|
||||
debug!(
|
||||
"FixStateUdafOrderingAnalyzer: fix state udaf from {old_state_wrapper:?} to {:?}",
|
||||
state_wrapper
|
||||
);
|
||||
|
||||
let mut aggregate_function = aggregate_function;
|
||||
|
||||
aggregate_function.func = Arc::new(AggregateUDF::new_from_impl(state_wrapper));
|
||||
|
||||
Ok(Transformed::yes(Expr::AggregateFunction(
|
||||
aggregate_function,
|
||||
)))
|
||||
}
|
||||
@@ -17,13 +17,15 @@ use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use arrow::array::{ArrayRef, Float64Array, Int64Array, UInt64Array};
|
||||
use arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, UInt64Array};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_schema::SchemaRef;
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use datafusion::catalog::{Session, TableProvider};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
|
||||
use datafusion::functions_aggregate::average::avg_udaf;
|
||||
use datafusion::functions_aggregate::count::count_udaf;
|
||||
use datafusion::functions_aggregate::sum::sum_udaf;
|
||||
use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
@@ -537,6 +539,208 @@ async fn test_avg_udaf() {
|
||||
assert_eq!(merge_eval_res, ScalarValue::Float64(Some(132. / 45_f64)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_last_value_order_by_udaf() {
|
||||
init_default_ut_logging();
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
let last_value = datafusion::functions_aggregate::first_last::last_value_udaf();
|
||||
let last_value = (*last_value).clone();
|
||||
|
||||
let original_aggr = Aggregate::try_new(
|
||||
Arc::new(dummy_table_scan()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(last_value.clone()),
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
vec![datafusion_expr::expr::Sort::new(
|
||||
Expr::Column(Column::new_unqualified("number")),
|
||||
true,
|
||||
true,
|
||||
)],
|
||||
None,
|
||||
))],
|
||||
)
|
||||
.unwrap();
|
||||
let res = StateMergeHelper::split_aggr_node(original_aggr).unwrap();
|
||||
|
||||
let state_func: Arc<AggregateUDF> =
|
||||
Arc::new(StateWrapper::new(last_value.clone()).unwrap().into());
|
||||
|
||||
let expected_aggr_state_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(
|
||||
Arc::new(dummy_table_scan()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
state_func,
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
vec![datafusion_expr::expr::Sort::new(
|
||||
Expr::Column(Column::new_unqualified("number")),
|
||||
true,
|
||||
true,
|
||||
)],
|
||||
None,
|
||||
))],
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
// fix the ordering & distinct info of the state udaf, as they are not set in the wrapper.
|
||||
let fixed_aggr_state_plan = FixStateUdafOrderingAnalyzer {}
|
||||
.analyze(expected_aggr_state_plan.clone(), &Default::default())
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(&res.lower_state, &fixed_aggr_state_plan);
|
||||
|
||||
// schema is the state fields of the last_value udaf
|
||||
assert_eq!(
|
||||
res.lower_state.schema().as_arrow(),
|
||||
&arrow_schema::Schema::new(vec![Field::new(
|
||||
"__last_value_state(number) ORDER BY [number ASC NULLS FIRST]",
|
||||
DataType::Struct(
|
||||
vec![
|
||||
Field::new("last_value[last_value]", DataType::Int64, true),
|
||||
Field::new("number", DataType::Int64, true), // ordering field is added to state fields too
|
||||
Field::new("is_set", DataType::Boolean, true)
|
||||
]
|
||||
.into()
|
||||
),
|
||||
true,
|
||||
)])
|
||||
);
|
||||
|
||||
let expected_merge_fn = MergeWrapper::new(
|
||||
last_value.clone(),
|
||||
Arc::new(
|
||||
AggregateExprBuilder::new(
|
||||
Arc::new(last_value.clone()),
|
||||
vec![Arc::new(
|
||||
datafusion::physical_expr::expressions::Column::new("number", 0),
|
||||
)],
|
||||
)
|
||||
.schema(Arc::new(dummy_table_scan().schema().as_arrow().clone()))
|
||||
.alias("last_value(number) ORDER BY [number ASC NULLS FIRST]")
|
||||
.build()
|
||||
.unwrap(),
|
||||
),
|
||||
vec![DataType::Int64],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let expected_merge_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(
|
||||
Arc::new(fixed_aggr_state_plan.clone()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(expected_merge_fn.into()),
|
||||
vec![Expr::Column(Column::new_unqualified(
|
||||
"__last_value_state(number) ORDER BY [number ASC NULLS FIRST]",
|
||||
))],
|
||||
false,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))
|
||||
.alias("last_value(number) ORDER BY [number ASC NULLS FIRST]")],
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(&res.upper_merge, &expected_merge_plan);
|
||||
|
||||
let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&fixed_aggr_state_plan, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
let aggr_exec = phy_aggr_state_plan
|
||||
.as_any()
|
||||
.downcast_ref::<AggregateExec>()
|
||||
.unwrap();
|
||||
let aggr_func_expr = &aggr_exec.aggr_expr()[0];
|
||||
|
||||
let mut state_accum = aggr_func_expr.create_accumulator().unwrap();
|
||||
|
||||
// evaluate the state function
|
||||
let input = Int64Array::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||
let values = vec![Arc::new(input) as arrow::array::ArrayRef];
|
||||
|
||||
state_accum.update_batch(&values).unwrap();
|
||||
|
||||
let state = state_accum.state().unwrap();
|
||||
|
||||
// FIXME(discord9): once datafusion fixes the issue that last_value udaf state fields are not correctly(missing ordering field if `last` field is part of ordering field)
|
||||
// then change it back to 3 fields
|
||||
assert_eq!(state.len(), 2); // last value weird optimization(or maybe bug?) that it only has 2 state fields now
|
||||
assert_eq!(state[0], ScalarValue::Int64(Some(3)));
|
||||
assert_eq!(state[1], ScalarValue::Boolean(Some(true)));
|
||||
|
||||
let eval_res = state_accum.evaluate().unwrap();
|
||||
let expected = Arc::new(
|
||||
StructArray::try_new(
|
||||
vec![
|
||||
Field::new("col_0[mismatch_state]", DataType::Int64, true),
|
||||
Field::new("col_1[mismatch_state]", DataType::Boolean, true),
|
||||
// Field::new("last_value[last_value]", DataType::Int64, true),
|
||||
// Field::new("number", DataType::Int64, true),
|
||||
// Field::new("is_set", DataType::Boolean, true),
|
||||
]
|
||||
.into(),
|
||||
vec![
|
||||
Arc::new(Int64Array::from(vec![Some(3)])),
|
||||
// Arc::new(Int64Array::from(vec![Some(3)])),
|
||||
Arc::new(BooleanArray::from(vec![Some(true)])),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(eval_res, ScalarValue::Struct(expected));
|
||||
|
||||
let phy_aggr_merge_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&res.upper_merge, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
let aggr_exec = phy_aggr_merge_plan
|
||||
.as_any()
|
||||
.downcast_ref::<AggregateExec>()
|
||||
.unwrap();
|
||||
let aggr_func_expr = &aggr_exec.aggr_expr()[0];
|
||||
|
||||
let mut merge_accum = aggr_func_expr.create_accumulator().unwrap();
|
||||
|
||||
let merge_input = vec![
|
||||
Arc::new(Int64Array::from(vec![Some(3), Some(4)])) as arrow::array::ArrayRef,
|
||||
Arc::new(Int64Array::from(vec![Some(3), Some(4)])),
|
||||
Arc::new(BooleanArray::from(vec![Some(true), Some(true)])),
|
||||
];
|
||||
let merge_input_struct_arr = StructArray::try_new(
|
||||
vec![
|
||||
Field::new("last_value[last_value]", DataType::Int64, true),
|
||||
Field::new("number", DataType::Int64, true),
|
||||
Field::new("is_set", DataType::Boolean, true),
|
||||
]
|
||||
.into(),
|
||||
merge_input,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
merge_accum
|
||||
.update_batch(&[Arc::new(merge_input_struct_arr)])
|
||||
.unwrap();
|
||||
let merge_state = merge_accum.state().unwrap();
|
||||
assert_eq!(merge_state.len(), 3);
|
||||
assert_eq!(merge_state[0], ScalarValue::Int64(Some(4)));
|
||||
assert_eq!(merge_state[1], ScalarValue::Int64(Some(4)));
|
||||
assert_eq!(merge_state[2], ScalarValue::Boolean(Some(true)));
|
||||
|
||||
let merge_eval_res = merge_accum.evaluate().unwrap();
|
||||
// the merge function returns the last value, which is 4
|
||||
assert_eq!(merge_eval_res, ScalarValue::Int64(Some(4)));
|
||||
}
|
||||
|
||||
/// For testing whether the UDAF state fields are correctly implemented.
|
||||
/// esp. for our own custom UDAF's state fields.
|
||||
/// By compare eval results before and after split to state/merge functions.
|
||||
@@ -548,6 +752,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
input_schema: SchemaRef,
|
||||
input: Vec<ArrayRef>,
|
||||
expected_output: Option<ScalarValue>,
|
||||
// extra check function on the final array result
|
||||
expected_fn: Option<ExpectedFn>,
|
||||
distinct: bool,
|
||||
filter: Option<Box<Expr>>,
|
||||
@@ -578,6 +783,27 @@ async fn test_udaf_correct_eval_result() {
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: count_udaf(),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"str_val",
|
||||
DataType::Utf8,
|
||||
true,
|
||||
)])),
|
||||
args: vec![Expr::Column(Column::new_unqualified("str_val"))],
|
||||
input: vec![Arc::new(StringArray::from(vec![
|
||||
Some("hello"),
|
||||
Some("world"),
|
||||
None,
|
||||
Some("what"),
|
||||
]))],
|
||||
expected_output: Some(ScalarValue::Int64(Some(3))),
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: avg_udaf(),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
|
||||
@@ -280,6 +280,8 @@ fn build_struct(
|
||||
&self,
|
||||
args: datafusion::logical_expr::ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<datafusion_expr::ColumnarValue> {
|
||||
use common_error::ext::ErrorExt;
|
||||
|
||||
let columns = args.args
|
||||
.iter()
|
||||
.map(|arg| {
|
||||
@@ -293,7 +295,7 @@ fn build_struct(
|
||||
})
|
||||
})
|
||||
.collect::<common_query::error::Result<Vec<_>>>()
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Column conversion error: {}", e)))?;
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Column conversion error: {}", e.output_msg())))?;
|
||||
|
||||
// Safety check: Ensure under the `greptime` catalog for security
|
||||
#user_path::ensure_greptime!(self.func_ctx);
|
||||
@@ -314,14 +316,14 @@ fn build_struct(
|
||||
.#handler
|
||||
.as_ref()
|
||||
.context(#snafu_type)
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Handler error: {}", e)))?;
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Handler error: {}", e.output_msg())))?;
|
||||
|
||||
let mut builder = store_api::storage::ConcreteDataType::#ret()
|
||||
.create_mutable_vector(rows_num);
|
||||
|
||||
if columns_num == 0 {
|
||||
let result = #fn_name(handler, query_ctx, &[]).await
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Function execution error: {}", e)))?;
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Function execution error: {}", e.output_msg())))?;
|
||||
|
||||
builder.push_value_ref(result.as_value_ref());
|
||||
} else {
|
||||
@@ -331,7 +333,7 @@ fn build_struct(
|
||||
.collect();
|
||||
|
||||
let result = #fn_name(handler, query_ctx, &args).await
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Function execution error: {}", e)))?;
|
||||
.map_err(|e| datafusion_common::DataFusionError::Execution(format!("Function execution error: {}", e.output_msg())))?;
|
||||
|
||||
builder.push_value_ref(result.as_value_ref());
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ workspace = true
|
||||
async-trait.workspace = true
|
||||
bytes.workspace = true
|
||||
common-error.workspace = true
|
||||
common-function.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
datafusion.workspace = true
|
||||
|
||||
@@ -16,9 +16,13 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use common_function::aggrs::aggr_wrapper::fix_order::{
|
||||
FixStateUdafOrderingAnalyzer, UnFixStateUdafOrderingAnalyzer,
|
||||
};
|
||||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::execution::SessionStateBuilder;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion::prelude::SessionConfig;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
|
||||
@@ -47,6 +51,9 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
let df_plan = from_substrait_plan(&state, &plan)
|
||||
.await
|
||||
.context(DecodeDfPlanSnafu)?;
|
||||
let df_plan = FixStateUdafOrderingAnalyzer {}
|
||||
.analyze(df_plan, state.config_options())
|
||||
.context(DecodeDfPlanSnafu)?;
|
||||
Ok(df_plan)
|
||||
}
|
||||
|
||||
@@ -55,8 +62,11 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
plan: &Self::Plan,
|
||||
serializer: impl SerializerRegistry + 'static,
|
||||
) -> Result<Bytes, Self::Error> {
|
||||
let plan = UnFixStateUdafOrderingAnalyzer {}
|
||||
.analyze(plan.clone(), &Default::default())
|
||||
.context(EncodeDfPlanSnafu)?;
|
||||
let mut buf = BytesMut::new();
|
||||
let substrait_plan = self.to_sub_plan(plan, serializer)?;
|
||||
let substrait_plan = self.to_sub_plan(&plan, serializer)?;
|
||||
substrait_plan.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
|
||||
Ok(buf.freeze())
|
||||
|
||||
@@ -376,34 +376,16 @@ impl Instance {
|
||||
ctx: QueryContextRef,
|
||||
) -> server_error::Result<bool> {
|
||||
let db_string = ctx.get_db_string();
|
||||
// fast cache check
|
||||
let cache = self
|
||||
.otlp_metrics_table_legacy_cache
|
||||
.entry(db_string)
|
||||
.entry(db_string.clone())
|
||||
.or_default();
|
||||
|
||||
// check cache
|
||||
let hit_cache = names
|
||||
.iter()
|
||||
.filter_map(|name| cache.get(*name))
|
||||
.collect::<Vec<_>>();
|
||||
if !hit_cache.is_empty() {
|
||||
let hit_legacy = hit_cache.iter().any(|en| *en.value());
|
||||
let hit_prom = hit_cache.iter().any(|en| !*en.value());
|
||||
|
||||
// hit but have true and false, means both legacy and new mode are used
|
||||
// we cannot handle this case, so return error
|
||||
// add doc links in err msg later
|
||||
ensure!(!(hit_legacy && hit_prom), OtlpMetricModeIncompatibleSnafu);
|
||||
|
||||
let flag = hit_legacy;
|
||||
// set cache for all names
|
||||
names.iter().for_each(|name| {
|
||||
if !cache.contains_key(*name) {
|
||||
cache.insert(name.to_string(), flag);
|
||||
}
|
||||
});
|
||||
if let Some(flag) = fast_legacy_check(&cache, names)? {
|
||||
return Ok(flag);
|
||||
}
|
||||
// release cache reference to avoid lock contention
|
||||
drop(cache);
|
||||
|
||||
let catalog = ctx.current_catalog();
|
||||
let schema = ctx.current_schema();
|
||||
@@ -430,7 +412,10 @@ impl Instance {
|
||||
|
||||
// means no existing table is found, use new mode
|
||||
if table_ids.is_empty() {
|
||||
// set cache
|
||||
let cache = self
|
||||
.otlp_metrics_table_legacy_cache
|
||||
.entry(db_string)
|
||||
.or_default();
|
||||
names.iter().for_each(|name| {
|
||||
cache.insert(name.to_string(), false);
|
||||
});
|
||||
@@ -455,6 +440,10 @@ impl Instance {
|
||||
.unwrap_or(&OTLP_LEGACY_DEFAULT_VALUE)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let cache = self
|
||||
.otlp_metrics_table_legacy_cache
|
||||
.entry(db_string)
|
||||
.or_default();
|
||||
if !options.is_empty() {
|
||||
// check value consistency
|
||||
let has_prom = options.iter().any(|opt| *opt == OTLP_METRIC_COMPAT_PROM);
|
||||
@@ -477,6 +466,39 @@ impl Instance {
|
||||
}
|
||||
}
|
||||
|
||||
fn fast_legacy_check(
|
||||
cache: &DashMap<String, bool>,
|
||||
names: &[&String],
|
||||
) -> server_error::Result<Option<bool>> {
|
||||
let hit_cache = names
|
||||
.iter()
|
||||
.filter_map(|name| cache.get(*name))
|
||||
.collect::<Vec<_>>();
|
||||
if !hit_cache.is_empty() {
|
||||
let hit_legacy = hit_cache.iter().any(|en| *en.value());
|
||||
let hit_prom = hit_cache.iter().any(|en| !*en.value());
|
||||
|
||||
// hit but have true and false, means both legacy and new mode are used
|
||||
// we cannot handle this case, so return error
|
||||
// add doc links in err msg later
|
||||
ensure!(!(hit_legacy && hit_prom), OtlpMetricModeIncompatibleSnafu);
|
||||
|
||||
let flag = hit_legacy;
|
||||
// drop hit_cache to release references before inserting to avoid deadlock
|
||||
drop(hit_cache);
|
||||
|
||||
// set cache for all names
|
||||
names.iter().for_each(|name| {
|
||||
if !cache.contains_key(*name) {
|
||||
cache.insert(name.to_string(), flag);
|
||||
}
|
||||
});
|
||||
Ok(Some(flag))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// If the relevant variables are set, the timeout is enforced for all PostgreSQL statements.
|
||||
/// For MySQL, it applies only to read-only statements.
|
||||
fn derive_timeout(stmt: &Statement, query_ctx: &QueryContextRef) -> Option<Duration> {
|
||||
@@ -1039,6 +1061,10 @@ fn should_capture_statement(stmt: Option<&Statement>) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Barrier};
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_base::Plugins;
|
||||
use query::query_engine::options::QueryOptions;
|
||||
@@ -1048,6 +1074,122 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fast_legacy_check_deadlock_prevention() {
|
||||
// Create a DashMap to simulate the cache
|
||||
let cache = DashMap::new();
|
||||
|
||||
// Pre-populate cache with some entries
|
||||
cache.insert("metric1".to_string(), true); // legacy mode
|
||||
cache.insert("metric2".to_string(), false); // prom mode
|
||||
cache.insert("metric3".to_string(), true); // legacy mode
|
||||
|
||||
// Test case 1: Normal operation with cache hits
|
||||
let metric1 = "metric1".to_string();
|
||||
let metric4 = "metric4".to_string();
|
||||
let names1 = vec![&metric1, &metric4];
|
||||
let result = fast_legacy_check(&cache, &names1);
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), Some(true)); // should return legacy mode
|
||||
|
||||
// Verify that metric4 was added to cache
|
||||
assert!(cache.contains_key("metric4"));
|
||||
assert!(*cache.get("metric4").unwrap().value());
|
||||
|
||||
// Test case 2: No cache hits
|
||||
let metric5 = "metric5".to_string();
|
||||
let metric6 = "metric6".to_string();
|
||||
let names2 = vec![&metric5, &metric6];
|
||||
let result = fast_legacy_check(&cache, &names2);
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), None); // should return None as no cache hits
|
||||
|
||||
// Test case 3: Incompatible modes should return error
|
||||
let cache_incompatible = DashMap::new();
|
||||
cache_incompatible.insert("metric1".to_string(), true); // legacy
|
||||
cache_incompatible.insert("metric2".to_string(), false); // prom
|
||||
let metric1_test = "metric1".to_string();
|
||||
let metric2_test = "metric2".to_string();
|
||||
let names3 = vec![&metric1_test, &metric2_test];
|
||||
let result = fast_legacy_check(&cache_incompatible, &names3);
|
||||
assert!(result.is_err()); // should error due to incompatible modes
|
||||
|
||||
// Test case 4: Intensive concurrent access to test deadlock prevention
|
||||
// This test specifically targets the scenario where multiple threads
|
||||
// access the same cache entries simultaneously
|
||||
let cache_concurrent = Arc::new(DashMap::new());
|
||||
cache_concurrent.insert("shared_metric".to_string(), true);
|
||||
|
||||
let num_threads = 8;
|
||||
let operations_per_thread = 100;
|
||||
let barrier = Arc::new(Barrier::new(num_threads));
|
||||
let success_flag = Arc::new(AtomicBool::new(true));
|
||||
|
||||
let handles: Vec<_> = (0..num_threads)
|
||||
.map(|thread_id| {
|
||||
let cache_clone = Arc::clone(&cache_concurrent);
|
||||
let barrier_clone = Arc::clone(&barrier);
|
||||
let success_flag_clone = Arc::clone(&success_flag);
|
||||
|
||||
thread::spawn(move || {
|
||||
// Wait for all threads to be ready
|
||||
barrier_clone.wait();
|
||||
|
||||
let start_time = Instant::now();
|
||||
for i in 0..operations_per_thread {
|
||||
// Each operation references existing cache entry and adds new ones
|
||||
let shared_metric = "shared_metric".to_string();
|
||||
let new_metric = format!("thread_{}_metric_{}", thread_id, i);
|
||||
let names = vec![&shared_metric, &new_metric];
|
||||
|
||||
match fast_legacy_check(&cache_clone, &names) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
success_flag_clone.store(false, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If the test takes too long, it likely means deadlock
|
||||
if start_time.elapsed() > Duration::from_secs(10) {
|
||||
success_flag_clone.store(false, Ordering::Relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Join all threads with timeout
|
||||
let start_time = Instant::now();
|
||||
for (i, handle) in handles.into_iter().enumerate() {
|
||||
let join_result = handle.join();
|
||||
|
||||
// Check if we're taking too long (potential deadlock)
|
||||
if start_time.elapsed() > Duration::from_secs(30) {
|
||||
panic!("Test timed out - possible deadlock detected!");
|
||||
}
|
||||
|
||||
if join_result.is_err() {
|
||||
panic!("Thread {} panicked during execution", i);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify all operations completed successfully
|
||||
assert!(
|
||||
success_flag.load(Ordering::Relaxed),
|
||||
"Some operations failed"
|
||||
);
|
||||
|
||||
// Verify that many new entries were added (proving operations completed)
|
||||
let final_count = cache_concurrent.len();
|
||||
assert!(
|
||||
final_count > 1 + num_threads * operations_per_thread / 2,
|
||||
"Expected more cache entries, got {}",
|
||||
final_count
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exec_validation() {
|
||||
let query_ctx = QueryContext::arc();
|
||||
|
||||
@@ -461,6 +461,7 @@ fn build_connection_options(tls_config: Option<&TlsOption>) -> Result<Option<Con
|
||||
if matches!(tls_config.mode, TlsMode::Disable) {
|
||||
return Ok(None);
|
||||
}
|
||||
info!("Creating etcd client with TLS mode: {:?}", tls_config.mode);
|
||||
let mut etcd_tls_opts = TlsOptions::new();
|
||||
// Set CA certificate if provided
|
||||
if !tls_config.ca_cert_path.is_empty() {
|
||||
|
||||
@@ -784,6 +784,10 @@ impl Metasrv {
|
||||
&self.plugins
|
||||
}
|
||||
|
||||
pub fn started(&self) -> Arc<AtomicBool> {
|
||||
self.started.clone()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn new_ctx(&self) -> Context {
|
||||
let server_addr = self.options().grpc.server_addr.clone();
|
||||
|
||||
@@ -127,10 +127,10 @@ mod tests {
|
||||
r#"
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3157, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3157, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3201, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3185, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3429, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3413, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3157, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10) }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3185, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }"#
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3413, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4) }"#
|
||||
);
|
||||
|
||||
// list from storage
|
||||
|
||||
@@ -19,7 +19,7 @@ common-recordbatch.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datatypes.workspace = true
|
||||
memcomparable = "0.2"
|
||||
memcomparable = { git = "https://github.com/v0y4g3r/memcomparable.git", rev = "a07122dc03556bbd88ad66234cbea7efd3b23efb" }
|
||||
paste.workspace = true
|
||||
serde.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -50,7 +50,6 @@ index.workspace = true
|
||||
itertools.workspace = true
|
||||
lazy_static = "1.4"
|
||||
log-store = { workspace = true }
|
||||
memcomparable = "0.2"
|
||||
mito-codec.workspace = true
|
||||
moka = { workspace = true, features = ["sync", "future"] }
|
||||
object-store.workspace = true
|
||||
|
||||
@@ -175,6 +175,10 @@ impl FileGroup {
|
||||
pub(crate) fn into_files(self) -> impl Iterator<Item = FileHandle> {
|
||||
self.files.into_iter()
|
||||
}
|
||||
|
||||
pub(crate) fn is_all_level_0(&self) -> bool {
|
||||
self.files.iter().all(|f| f.level() == 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for FileGroup {
|
||||
|
||||
@@ -42,6 +42,25 @@ pub fn new_file_handle_with_sequence(
|
||||
end_ts_millis: i64,
|
||||
level: Level,
|
||||
sequence: u64,
|
||||
) -> FileHandle {
|
||||
new_file_handle_with_size_and_sequence(
|
||||
file_id,
|
||||
start_ts_millis,
|
||||
end_ts_millis,
|
||||
level,
|
||||
sequence,
|
||||
0,
|
||||
)
|
||||
}
|
||||
|
||||
/// Test util to create file handles with custom size.
|
||||
pub fn new_file_handle_with_size_and_sequence(
|
||||
file_id: FileId,
|
||||
start_ts_millis: i64,
|
||||
end_ts_millis: i64,
|
||||
level: Level,
|
||||
sequence: u64,
|
||||
file_size: u64,
|
||||
) -> FileHandle {
|
||||
let file_purger = new_noop_file_purger();
|
||||
FileHandle::new(
|
||||
@@ -53,7 +72,7 @@ pub fn new_file_handle_with_sequence(
|
||||
Timestamp::new_millisecond(end_ts_millis),
|
||||
),
|
||||
level,
|
||||
file_size: 0,
|
||||
file_size,
|
||||
available_indexes: Default::default(),
|
||||
index_file_size: 0,
|
||||
num_rows: 0,
|
||||
|
||||
@@ -64,11 +64,32 @@ impl TwcsPicker {
|
||||
continue;
|
||||
}
|
||||
let mut files_to_merge: Vec<_> = files.files().cloned().collect();
|
||||
|
||||
// Filter out large files in append mode - they won't benefit from compaction
|
||||
if self.append_mode {
|
||||
if let Some(max_size) = self.max_output_file_size {
|
||||
let (kept_files, ignored_files) = files_to_merge
|
||||
.into_iter()
|
||||
.partition(|fg| fg.size() <= max_size as usize && fg.is_all_level_0());
|
||||
files_to_merge = kept_files;
|
||||
info!(
|
||||
"Skipped {} large files in append mode for region {}, window {}, max_size: {}",
|
||||
ignored_files.len(),
|
||||
region_id,
|
||||
window,
|
||||
max_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let sorted_runs = find_sorted_runs(&mut files_to_merge);
|
||||
let found_runs = sorted_runs.len();
|
||||
// We only remove deletion markers if we found less than 2 runs and not in append mode.
|
||||
// because after compaction there will be no overlapping files.
|
||||
let filter_deleted = !files.overlapping && found_runs <= 2 && !self.append_mode;
|
||||
if found_runs == 0 {
|
||||
return output;
|
||||
}
|
||||
|
||||
let inputs = if found_runs > 1 {
|
||||
reduce_runs(sorted_runs)
|
||||
@@ -330,7 +351,9 @@ mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use crate::compaction::test_util::{new_file_handle, new_file_handle_with_sequence};
|
||||
use crate::compaction::test_util::{
|
||||
new_file_handle, new_file_handle_with_sequence, new_file_handle_with_size_and_sequence,
|
||||
};
|
||||
use crate::sst::file::{FileId, Level};
|
||||
|
||||
#[test]
|
||||
@@ -766,5 +789,45 @@ mod tests {
|
||||
.check();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_mode_filter_large_files() {
|
||||
let file_ids = (0..4).map(|_| FileId::random()).collect::<Vec<_>>();
|
||||
let max_output_file_size = 1000u64;
|
||||
|
||||
// Create files with different sizes
|
||||
let small_file_1 = new_file_handle_with_size_and_sequence(file_ids[0], 0, 999, 0, 1, 500);
|
||||
let large_file_1 = new_file_handle_with_size_and_sequence(file_ids[1], 0, 999, 0, 2, 1500);
|
||||
let small_file_2 = new_file_handle_with_size_and_sequence(file_ids[2], 0, 999, 0, 3, 800);
|
||||
let large_file_2 = new_file_handle_with_size_and_sequence(file_ids[3], 0, 999, 0, 4, 2000);
|
||||
|
||||
// Create file groups (each file is in its own group due to different sequences)
|
||||
let mut files_to_merge = vec![
|
||||
FileGroup::new_with_file(small_file_1),
|
||||
FileGroup::new_with_file(large_file_1),
|
||||
FileGroup::new_with_file(small_file_2),
|
||||
FileGroup::new_with_file(large_file_2),
|
||||
];
|
||||
|
||||
// Test filtering logic directly
|
||||
let original_count = files_to_merge.len();
|
||||
|
||||
// Apply append mode filtering
|
||||
files_to_merge.retain(|fg| fg.size() <= max_output_file_size as usize);
|
||||
|
||||
// Should have filtered out 2 large files, leaving 2 small files
|
||||
assert_eq!(files_to_merge.len(), 2);
|
||||
assert_eq!(original_count, 4);
|
||||
|
||||
// Verify the remaining files are the small ones
|
||||
for fg in &files_to_merge {
|
||||
assert!(
|
||||
fg.size() <= max_output_file_size as usize,
|
||||
"File size {} should be <= {}",
|
||||
fg.size(),
|
||||
max_output_file_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(hl): TTL tester that checks if get_expired_ssts function works as expected.
|
||||
}
|
||||
|
||||
@@ -53,6 +53,8 @@ mod prune_test;
|
||||
#[cfg(test)]
|
||||
mod row_selector_test;
|
||||
#[cfg(test)]
|
||||
mod scan_corrupt;
|
||||
#[cfg(test)]
|
||||
mod scan_test;
|
||||
#[cfg(test)]
|
||||
mod set_role_state_test;
|
||||
|
||||
112
src/mito2/src/engine/scan_corrupt.rs
Normal file
112
src/mito2/src/engine/scan_corrupt.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::helper::row;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::Rows;
|
||||
use datatypes::value::Value;
|
||||
use mito_codec::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodec};
|
||||
use parquet::file::statistics::Statistics;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{PathType, RegionRequest};
|
||||
use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::config::MitoConfig;
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
use crate::test_util;
|
||||
use crate::test_util::{CreateRequestBuilder, TestEnv};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scan_corrupt() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut env = TestEnv::with_prefix("test_write_stats_with_long_string_value").await;
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let table_dir = request.table_dir.clone();
|
||||
let column_schemas = test_util::rows_schema(&request);
|
||||
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let build_rows = |start: i32, end: i32| {
|
||||
(start..end)
|
||||
.map(|i| {
|
||||
row(vec![
|
||||
ValueData::StringValue(i.to_string().repeat(128)),
|
||||
ValueData::F64Value(i as f64),
|
||||
ValueData::TimestampMillisecondValue(i as i64 * 1000),
|
||||
])
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
let put_rows = async |start, end| {
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(start, end),
|
||||
};
|
||||
test_util::put_rows(&engine, region_id, rows).await;
|
||||
test_util::flush_region(&engine, region_id, None).await;
|
||||
};
|
||||
put_rows(0, 3).await;
|
||||
|
||||
let region = engine.get_region(region_id).unwrap();
|
||||
|
||||
let version = region.version();
|
||||
let file = version
|
||||
.ssts
|
||||
.levels()
|
||||
.iter()
|
||||
.flat_map(|l| l.files.values())
|
||||
.next()
|
||||
.unwrap();
|
||||
|
||||
let object_store = env.get_object_store().unwrap();
|
||||
let reader = ParquetReaderBuilder::new(
|
||||
table_dir.clone(),
|
||||
PathType::Bare,
|
||||
file.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.build()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let codec = DensePrimaryKeyCodec::new(&version.metadata);
|
||||
for r in reader.parquet_metadata().row_groups() {
|
||||
for c in r.columns() {
|
||||
if c.column_descr().name() == PRIMARY_KEY_COLUMN_NAME {
|
||||
let stats = c.statistics().unwrap();
|
||||
let Statistics::ByteArray(b) = stats else {
|
||||
unreachable!()
|
||||
};
|
||||
let min = codec
|
||||
.decode_leftmost(b.min_bytes_opt().unwrap())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(Value::String("0".repeat(128).into()), min);
|
||||
|
||||
let max = codec
|
||||
.decode_leftmost(b.max_bytes_opt().unwrap())
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(Value::String("2".repeat(128).into()), max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -554,6 +554,8 @@ impl BulkPartEncoder {
|
||||
WriterProperties::builder()
|
||||
.set_write_batch_size(row_group_size)
|
||||
.set_max_row_group_size(row_group_size)
|
||||
.set_column_index_truncate_length(None)
|
||||
.set_statistics_truncate_length(None)
|
||||
.build(),
|
||||
);
|
||||
Self {
|
||||
|
||||
@@ -774,7 +774,9 @@ impl<'a> DataPartEncoder<'a> {
|
||||
.set_column_encoding(sequence_col.clone(), Encoding::DELTA_BINARY_PACKED)
|
||||
.set_column_dictionary_enabled(sequence_col, false)
|
||||
.set_column_encoding(op_type_col.clone(), Encoding::DELTA_BINARY_PACKED)
|
||||
.set_column_dictionary_enabled(op_type_col, true);
|
||||
.set_column_dictionary_enabled(op_type_col, true)
|
||||
.set_column_index_truncate_length(None)
|
||||
.set_statistics_truncate_length(None);
|
||||
builder.build()
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, ScannerMetrics};
|
||||
|
||||
/// Timeout to send a batch to a sender.
|
||||
const SEND_TIMEOUT: Duration = Duration::from_millis(10);
|
||||
const SEND_TIMEOUT: Duration = Duration::from_micros(100);
|
||||
|
||||
/// List of receivers.
|
||||
type ReceiverList = Vec<Option<Receiver<Result<SeriesBatch>>>>;
|
||||
|
||||
@@ -319,6 +319,10 @@ impl FileHandle {
|
||||
pub fn num_rows(&self) -> usize {
|
||||
self.inner.meta.num_rows as usize
|
||||
}
|
||||
|
||||
pub fn level(&self) -> Level {
|
||||
self.inner.meta.level
|
||||
}
|
||||
}
|
||||
|
||||
/// Inner data of [FileHandle].
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::ErrorKind;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -58,14 +59,20 @@ impl IntermediateManager {
|
||||
let aux_pb = PathBuf::from(aux_path.as_ref());
|
||||
let intm_dir = aux_pb.join(INTERMEDIATE_DIR);
|
||||
let deleted_dir = intm_dir.with_extension(format!("deleted-{}", Uuid::new_v4()));
|
||||
if let Err(err) = tokio::fs::rename(&intm_dir, &deleted_dir).await {
|
||||
warn!(err; "Failed to rename intermediate directory");
|
||||
}
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = tokio::fs::remove_dir_all(deleted_dir).await {
|
||||
warn!(err; "Failed to remove intermediate directory");
|
||||
match tokio::fs::rename(&intm_dir, &deleted_dir).await {
|
||||
Ok(_) => {
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = tokio::fs::remove_dir_all(deleted_dir).await {
|
||||
warn!(err; "Failed to remove intermediate directory");
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
Err(err) => {
|
||||
if err.kind() != ErrorKind::NotFound {
|
||||
warn!(err; "Failed to rename intermediate directory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let store = new_fs_cache_store(&normalize_dir(aux_path.as_ref())).await?;
|
||||
let store = InstrumentedStore::new(store);
|
||||
|
||||
@@ -329,7 +329,9 @@ where
|
||||
.set_key_value_metadata(Some(vec![key_value_meta]))
|
||||
.set_compression(Compression::ZSTD(ZstdLevel::default()))
|
||||
.set_encoding(Encoding::PLAIN)
|
||||
.set_max_row_group_size(opts.row_group_size);
|
||||
.set_max_row_group_size(opts.row_group_size)
|
||||
.set_column_index_truncate_length(None)
|
||||
.set_statistics_truncate_length(None);
|
||||
|
||||
let props_builder = Self::customize_column_config(props_builder, &self.metadata);
|
||||
let writer_props = props_builder.build();
|
||||
|
||||
@@ -605,7 +605,13 @@ impl RangeManipulateStream {
|
||||
|
||||
// shorten the range to calculate
|
||||
let first_ts = ts_column.value(0);
|
||||
let first_ts_aligned = (first_ts / self.interval) * self.interval;
|
||||
// Preserve the query's alignment pattern when optimizing start time
|
||||
let remainder = (first_ts - self.start).rem_euclid(self.interval);
|
||||
let first_ts_aligned = if remainder == 0 {
|
||||
first_ts
|
||||
} else {
|
||||
first_ts + (self.interval - remainder)
|
||||
};
|
||||
let last_ts = ts_column.value(ts_column.len() - 1);
|
||||
let last_ts_aligned = ((last_ts + self.range) / self.interval) * self.interval;
|
||||
let start = self.start.max(first_ts_aligned);
|
||||
@@ -671,6 +677,7 @@ mod test {
|
||||
use datafusion::datasource::source::DataSourceExec;
|
||||
use datafusion::physical_expr::Partitioning;
|
||||
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datatypes::arrow::array::TimestampMillisecondArray;
|
||||
|
||||
@@ -832,4 +839,66 @@ mod test {
|
||||
}");
|
||||
do_normalize_test(1, 10_001, 3_000, 1_000, expected).await;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_range_preserves_alignment() {
|
||||
// Test case: query starts at timestamp ending in 4000, step is 30s
|
||||
// Data starts at different alignment - should preserve query's 4000 pattern
|
||||
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||
"timestamp",
|
||||
TimestampMillisecondType::DATA_TYPE,
|
||||
false,
|
||||
)]));
|
||||
let empty_stream = MemoryStream::try_new(vec![], schema.clone(), None).unwrap();
|
||||
|
||||
let stream = RangeManipulateStream {
|
||||
start: 1758093274000, // ends in 4000
|
||||
end: 1758093334000, // ends in 4000
|
||||
interval: 30000, // 30s step
|
||||
range: 60000, // 60s lookback
|
||||
time_index: 0,
|
||||
field_columns: vec![],
|
||||
aligned_ts_array: Arc::new(TimestampMillisecondArray::from(vec![0i64; 0])),
|
||||
output_schema: schema.clone(),
|
||||
input: Box::pin(empty_stream),
|
||||
metric: BaselineMetrics::new(&ExecutionPlanMetricsSet::new(), 0),
|
||||
num_series: Count::new(),
|
||||
};
|
||||
|
||||
// Create test data with timestamps not aligned to query pattern
|
||||
let test_timestamps = vec![
|
||||
1758093260000, // ends in 0000 (different alignment)
|
||||
1758093290000, // ends in 0000
|
||||
1758093320000, // ends in 0000
|
||||
];
|
||||
let ts_array = TimestampMillisecondArray::from(test_timestamps);
|
||||
let test_schema = Arc::new(Schema::new(vec![Field::new(
|
||||
"timestamp",
|
||||
TimestampMillisecondType::DATA_TYPE,
|
||||
false,
|
||||
)]));
|
||||
let batch = RecordBatch::try_new(test_schema, vec![Arc::new(ts_array)]).unwrap();
|
||||
|
||||
let (ranges, (start, end)) = stream.calculate_range(&batch).unwrap();
|
||||
|
||||
// Verify the optimized start preserves query alignment (should end in 4000)
|
||||
assert_eq!(
|
||||
start % 30000,
|
||||
1758093274000 % 30000,
|
||||
"Optimized start should preserve query alignment pattern"
|
||||
);
|
||||
|
||||
// Verify we generate correct number of ranges for the alignment
|
||||
let expected_timestamps: Vec<i64> = (start..=end).step_by(30000).collect();
|
||||
assert_eq!(ranges.len(), expected_timestamps.len());
|
||||
|
||||
// Verify all generated timestamps maintain the same alignment pattern
|
||||
for ts in expected_timestamps {
|
||||
assert_eq!(
|
||||
ts % 30000,
|
||||
1758093274000 % 30000,
|
||||
"All timestamps should maintain query alignment pattern"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
@@ -32,10 +32,12 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::dist_plan::analyzer::utils::{aliased_columns_for, rewrite_merge_sort_exprs};
|
||||
use crate::dist_plan::commutativity::{
|
||||
partial_commutative_transformer, Categorizer, Commutativity,
|
||||
};
|
||||
use crate::dist_plan::merge_scan::MergeScanLogicalPlan;
|
||||
use crate::dist_plan::merge_sort::MergeSortLogicalPlan;
|
||||
use crate::metrics::PUSH_DOWN_FALLBACK_ERRORS_TOTAL;
|
||||
use crate::plan::ExtractExpr;
|
||||
use crate::query_engine::DefaultSerializer;
|
||||
@@ -46,7 +48,10 @@ mod test;
|
||||
mod fallback;
|
||||
mod utils;
|
||||
|
||||
pub(crate) use utils::{AliasMapping, AliasTracker};
|
||||
pub(crate) use utils::AliasMapping;
|
||||
|
||||
/// Placeholder for other physical partition columns that are not in logical table
|
||||
const OTHER_PHY_PART_COL_PLACEHOLDER: &str = "__OTHER_PHYSICAL_PART_COLS_PLACEHOLDER__";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DistPlannerOptions {
|
||||
@@ -229,8 +234,7 @@ struct PlanRewriter {
|
||||
stage: Vec<LogicalPlan>,
|
||||
status: RewriterStatus,
|
||||
/// Partition columns of the table in current pass
|
||||
partition_cols: Option<Vec<String>>,
|
||||
alias_tracker: Option<AliasTracker>,
|
||||
partition_cols: Option<AliasMapping>,
|
||||
/// use stack count as scope to determine column requirements is needed or not
|
||||
/// i.e for a logical plan like:
|
||||
/// ```ignore
|
||||
@@ -288,7 +292,7 @@ impl PlanRewriter {
|
||||
}
|
||||
|
||||
/// Return true if should stop and expand. The input plan is the parent node of current node
|
||||
fn should_expand(&mut self, plan: &LogicalPlan) -> bool {
|
||||
fn should_expand(&mut self, plan: &LogicalPlan) -> DfResult<bool> {
|
||||
debug!(
|
||||
"Check should_expand at level: {} with Stack:\n{}, ",
|
||||
self.level,
|
||||
@@ -298,20 +302,21 @@ impl PlanRewriter {
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
);
|
||||
if DFLogicalSubstraitConvertor
|
||||
.encode(plan, DefaultSerializer)
|
||||
.is_err()
|
||||
{
|
||||
return true;
|
||||
if let Err(e) = DFLogicalSubstraitConvertor.encode(plan, DefaultSerializer) {
|
||||
debug!(
|
||||
"PlanRewriter: plan cannot be converted to substrait with error={e:?}, expanding now: {plan}"
|
||||
);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
if self.expand_on_next_call {
|
||||
self.expand_on_next_call = false;
|
||||
return true;
|
||||
debug!("PlanRewriter: expand_on_next_call is true, expanding now");
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
if self.expand_on_next_part_cond_trans_commutative {
|
||||
let comm = Categorizer::check_plan(plan, self.get_aliased_partition_columns());
|
||||
let comm = Categorizer::check_plan(plan, self.partition_cols.clone())?;
|
||||
match comm {
|
||||
Commutativity::PartialCommutative => {
|
||||
// a small difference is that for partial commutative, we still need to
|
||||
@@ -327,13 +332,16 @@ impl PlanRewriter {
|
||||
// again a new node that can be push down, we should just
|
||||
// do push down now and avoid further expansion
|
||||
self.expand_on_next_part_cond_trans_commutative = false;
|
||||
return true;
|
||||
debug!(
|
||||
"PlanRewriter: meet a new conditional/transformed commutative plan, expanding now: {plan}"
|
||||
);
|
||||
return Ok(true);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
match Categorizer::check_plan(plan, self.get_aliased_partition_columns()) {
|
||||
match Categorizer::check_plan(plan, self.partition_cols.clone())? {
|
||||
Commutativity::Commutative => {}
|
||||
Commutativity::PartialCommutative => {
|
||||
if let Some(plan) = partial_commutative_transformer(plan) {
|
||||
@@ -354,9 +362,8 @@ impl PlanRewriter {
|
||||
}
|
||||
}
|
||||
Commutativity::TransformedCommutative { transformer } => {
|
||||
if let Some(transformer) = transformer
|
||||
&& let Some(transformer_actions) = transformer(plan)
|
||||
{
|
||||
if let Some(transformer) = transformer {
|
||||
let transformer_actions = transformer(plan)?;
|
||||
debug!(
|
||||
"PlanRewriter: transformed plan: {}\n from {plan}",
|
||||
transformer_actions
|
||||
@@ -387,11 +394,12 @@ impl PlanRewriter {
|
||||
Commutativity::NonCommutative
|
||||
| Commutativity::Unimplemented
|
||||
| Commutativity::Unsupported => {
|
||||
return true;
|
||||
debug!("PlanRewriter: meet a non-commutative plan, expanding now: {plan}");
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
/// Update the column requirements for the current plan, plan_level is the level of the plan
|
||||
@@ -427,49 +435,31 @@ impl PlanRewriter {
|
||||
self.status = RewriterStatus::Unexpanded;
|
||||
}
|
||||
|
||||
/// Maybe update alias for original table columns in the plan
|
||||
fn maybe_update_alias(&mut self, node: &LogicalPlan) {
|
||||
if let Some(alias_tracker) = &mut self.alias_tracker {
|
||||
alias_tracker.update_alias(node);
|
||||
debug!(
|
||||
"Current partition columns are: {:?}",
|
||||
self.get_aliased_partition_columns()
|
||||
);
|
||||
} else if let LogicalPlan::TableScan(table_scan) = node {
|
||||
self.alias_tracker = AliasTracker::new(table_scan);
|
||||
debug!(
|
||||
"Initialize partition columns: {:?} with table={}",
|
||||
self.get_aliased_partition_columns(),
|
||||
table_scan.table_name
|
||||
);
|
||||
}
|
||||
}
|
||||
fn maybe_set_partitions(&mut self, plan: &LogicalPlan) -> DfResult<()> {
|
||||
if let Some(part_cols) = &mut self.partition_cols {
|
||||
// update partition alias
|
||||
let child = plan.inputs().first().cloned().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"PlanRewriter: maybe_set_partitions: plan has no child: {plan}"
|
||||
))
|
||||
})?;
|
||||
|
||||
fn get_aliased_partition_columns(&self) -> Option<AliasMapping> {
|
||||
if let Some(part_cols) = self.partition_cols.as_ref() {
|
||||
let Some(alias_tracker) = &self.alias_tracker else {
|
||||
// no alias tracker meaning no table scan encountered
|
||||
return None;
|
||||
};
|
||||
let mut aliased = HashMap::new();
|
||||
for part_col in part_cols {
|
||||
let all_alias = alias_tracker
|
||||
.get_all_alias_for_col(part_col)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
aliased.insert(part_col.clone(), all_alias);
|
||||
for (_col_name, alias_set) in part_cols.iter_mut() {
|
||||
let aliased_cols = aliased_columns_for(
|
||||
&alias_set.clone().into_iter().collect(),
|
||||
plan,
|
||||
Some(child),
|
||||
)?;
|
||||
*alias_set = aliased_cols.into_values().flatten().collect();
|
||||
}
|
||||
Some(aliased)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_set_partitions(&mut self, plan: &LogicalPlan) {
|
||||
if self.partition_cols.is_some() {
|
||||
// only need to set once
|
||||
return;
|
||||
debug!(
|
||||
"PlanRewriter: maybe_set_partitions: updated partition columns: {:?} at plan: {}",
|
||||
part_cols,
|
||||
plan.display()
|
||||
);
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let LogicalPlan::TableScan(table_scan) = plan {
|
||||
@@ -506,14 +496,39 @@ impl PlanRewriter {
|
||||
// as subset of phy part cols can still be used for certain optimization, and it works as if
|
||||
// those columns are always null
|
||||
// This helps with distinguishing between non-partitioned table and partitioned table with all phy part cols not in logical table
|
||||
partition_cols
|
||||
.push("__OTHER_PHYSICAL_PART_COLS_PLACEHOLDER__".to_string());
|
||||
partition_cols.push(OTHER_PHY_PART_COL_PLACEHOLDER.to_string());
|
||||
}
|
||||
self.partition_cols = Some(partition_cols);
|
||||
self.partition_cols = Some(
|
||||
partition_cols
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
if c == OTHER_PHY_PART_COL_PLACEHOLDER {
|
||||
// for placeholder, just return a empty alias
|
||||
return Ok((c.clone(), BTreeSet::new()));
|
||||
}
|
||||
let index =
|
||||
plan.schema().index_of_column_by_name(None, &c).ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(
|
||||
format!(
|
||||
"PlanRewriter: maybe_set_partitions: column {c} not found in schema of plan: {plan}"
|
||||
),
|
||||
)
|
||||
})?;
|
||||
let column = plan.schema().columns().get(index).cloned().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"PlanRewriter: maybe_set_partitions: column index {index} out of bounds in schema of plan: {plan}"
|
||||
))
|
||||
})?;
|
||||
Ok((c.clone(), BTreeSet::from([column])))
|
||||
})
|
||||
.collect::<DfResult<AliasMapping>>()?,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// pop one stack item and reduce the level by 1
|
||||
@@ -539,9 +554,14 @@ impl PlanRewriter {
|
||||
"PlanRewriter: after enforced column requirements with rewriter: {rewriter:?} for node:\n{on_node}"
|
||||
);
|
||||
|
||||
debug!(
|
||||
"PlanRewriter: expand on node: {on_node} with partition col alias mapping: {:?}",
|
||||
self.partition_cols
|
||||
);
|
||||
|
||||
// add merge scan as the new root
|
||||
let mut node = MergeScanLogicalPlan::new(
|
||||
on_node,
|
||||
on_node.clone(),
|
||||
false,
|
||||
// at this stage, the partition cols should be set
|
||||
// treat it as non-partitioned if None
|
||||
@@ -551,6 +571,15 @@ impl PlanRewriter {
|
||||
|
||||
// expand stages
|
||||
for new_stage in self.stage.drain(..) {
|
||||
// tracking alias for merge sort's sort exprs
|
||||
let new_stage = if let LogicalPlan::Extension(ext) = &new_stage
|
||||
&& let Some(merge_sort) = ext.node.as_any().downcast_ref::<MergeSortLogicalPlan>()
|
||||
{
|
||||
// TODO(discord9): change `on_node` to `node` once alias tracking is supported for merge scan
|
||||
rewrite_merge_sort_exprs(merge_sort, &on_node)?
|
||||
} else {
|
||||
new_stage
|
||||
};
|
||||
node = new_stage
|
||||
.with_new_exprs(new_stage.expressions_consider_join(), vec![node.clone()])?;
|
||||
}
|
||||
@@ -592,6 +621,7 @@ struct EnforceDistRequirementRewriter {
|
||||
/// when on `Projection` node, we don't need to apply the column requirements of `Aggregate` node
|
||||
/// because the `Projection` node is not in the scope of the `Aggregate` node
|
||||
cur_level: usize,
|
||||
plan_per_level: BTreeMap<usize, LogicalPlan>,
|
||||
}
|
||||
|
||||
impl EnforceDistRequirementRewriter {
|
||||
@@ -599,8 +629,67 @@ impl EnforceDistRequirementRewriter {
|
||||
Self {
|
||||
column_requirements,
|
||||
cur_level,
|
||||
plan_per_level: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a mapping from (original column, level) to aliased columns in current node of all
|
||||
/// applicable column requirements
|
||||
/// i.e. only column requirements with level >= `cur_level` will be considered
|
||||
fn get_current_applicable_column_requirements(
|
||||
&self,
|
||||
node: &LogicalPlan,
|
||||
) -> DfResult<BTreeMap<(Column, usize), BTreeSet<Column>>> {
|
||||
let col_req_per_level = self
|
||||
.column_requirements
|
||||
.iter()
|
||||
.filter(|(_, level)| *level >= self.cur_level)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// track alias for columns and use aliased columns instead
|
||||
// aliased col reqs at current level
|
||||
let mut result_alias_mapping = BTreeMap::new();
|
||||
let Some(child) = node.inputs().first().cloned() else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
for (col_req, level) in col_req_per_level {
|
||||
if let Some(original) = self.plan_per_level.get(level) {
|
||||
// query for alias in current plan
|
||||
let aliased_cols =
|
||||
aliased_columns_for(&col_req.iter().cloned().collect(), node, Some(original))?;
|
||||
for original_col in col_req {
|
||||
let aliased_cols = aliased_cols.get(original_col).cloned();
|
||||
if let Some(cols) = aliased_cols
|
||||
&& !cols.is_empty()
|
||||
{
|
||||
result_alias_mapping.insert((original_col.clone(), *level), cols);
|
||||
} else {
|
||||
// if no aliased column found in current node, there should be alias in child node as promised by enforce col reqs
|
||||
// because it should insert required columns in child node
|
||||
// so we can find the alias in child node
|
||||
// if not found, it's an internal error
|
||||
let aliases_in_child = aliased_columns_for(
|
||||
&[original_col.clone()].into(),
|
||||
child,
|
||||
Some(original),
|
||||
)?;
|
||||
let Some(aliases) = aliases_in_child
|
||||
.get(original_col)
|
||||
.cloned()
|
||||
.filter(|a| !a.is_empty())
|
||||
else {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"EnforceDistRequirementRewriter: no alias found for required column {original_col} in child plan {child} from original plan {original}",
|
||||
)));
|
||||
};
|
||||
|
||||
result_alias_mapping.insert((original_col.clone(), *level), aliases);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result_alias_mapping)
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
@@ -614,6 +703,7 @@ impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
self.plan_per_level.insert(self.cur_level, node.clone());
|
||||
self.cur_level += 1;
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
@@ -621,38 +711,41 @@ impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
self.cur_level -= 1;
|
||||
// first get all applicable column requirements
|
||||
let mut applicable_column_requirements = self
|
||||
.column_requirements
|
||||
.iter()
|
||||
.filter(|(_, level)| *level >= self.cur_level)
|
||||
.map(|(cols, _)| cols.clone())
|
||||
.reduce(|mut acc, cols| {
|
||||
acc.extend(cols);
|
||||
acc
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
debug!(
|
||||
"EnforceDistRequirementRewriter: applicable column requirements at level {} = {:?} for node {}",
|
||||
self.cur_level,
|
||||
applicable_column_requirements,
|
||||
node.display()
|
||||
);
|
||||
|
||||
// make sure all projection applicable scope has the required columns
|
||||
if let LogicalPlan::Projection(ref projection) = node {
|
||||
let mut applicable_column_requirements =
|
||||
self.get_current_applicable_column_requirements(&node)?;
|
||||
|
||||
debug!(
|
||||
"EnforceDistRequirementRewriter: applicable column requirements at level {} = {:?} for node {}",
|
||||
self.cur_level,
|
||||
applicable_column_requirements,
|
||||
node.display()
|
||||
);
|
||||
|
||||
for expr in &projection.expr {
|
||||
let (qualifier, name) = expr.qualified_name();
|
||||
let column = Column::new(qualifier, name);
|
||||
applicable_column_requirements.remove(&column);
|
||||
applicable_column_requirements.retain(|_col_level, alias_set| {
|
||||
// remove all columns that are already in the projection exprs
|
||||
!alias_set.contains(&column)
|
||||
});
|
||||
}
|
||||
if applicable_column_requirements.is_empty() {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
let mut new_exprs = projection.expr.clone();
|
||||
for col in &applicable_column_requirements {
|
||||
new_exprs.push(Expr::Column(col.clone()));
|
||||
for (col, alias_set) in &applicable_column_requirements {
|
||||
// use the first alias in alias set as the column to add
|
||||
new_exprs.push(Expr::Column(alias_set.first().cloned().ok_or_else(
|
||||
|| {
|
||||
datafusion_common::DataFusionError::Internal(
|
||||
format!("EnforceDistRequirementRewriter: alias set is empty, for column {col:?} in node {node}"),
|
||||
)
|
||||
},
|
||||
)?));
|
||||
}
|
||||
let new_node =
|
||||
node.with_new_exprs(new_exprs, node.inputs().into_iter().cloned().collect())?;
|
||||
@@ -661,6 +754,9 @@ impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
applicable_column_requirements
|
||||
);
|
||||
|
||||
// update plan for later use
|
||||
self.plan_per_level.insert(self.cur_level, new_node.clone());
|
||||
|
||||
// still need to continue for next projection if applicable
|
||||
return Ok(Transformed::yes(new_node));
|
||||
}
|
||||
@@ -679,7 +775,6 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
self.stage.clear();
|
||||
self.set_unexpanded();
|
||||
self.partition_cols = None;
|
||||
self.alias_tracker = None;
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
|
||||
@@ -700,9 +795,7 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
self.maybe_set_partitions(&node);
|
||||
|
||||
self.maybe_update_alias(&node);
|
||||
self.maybe_set_partitions(&node)?;
|
||||
|
||||
let Some(parent) = self.get_parent() else {
|
||||
debug!("Plan Rewriter: expand now for no parent found for node: {node}");
|
||||
@@ -721,8 +814,7 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
|
||||
let parent = parent.clone();
|
||||
|
||||
// TODO(ruihang): avoid this clone
|
||||
if self.should_expand(&parent) {
|
||||
if self.should_expand(&parent)? {
|
||||
// TODO(ruihang): does this work for nodes with multiple children?;
|
||||
debug!(
|
||||
"PlanRewriter: should expand child:\n {node}\n Of Parent: {}",
|
||||
|
||||
@@ -17,13 +17,17 @@
|
||||
//! This is a temporary solution, and will be removed once we have a more robust plan rewriter
|
||||
//!
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::dist_plan::analyzer::{AliasMapping, OTHER_PHY_PART_COL_PLACEHOLDER};
|
||||
use crate::dist_plan::MergeScanLogicalPlan;
|
||||
|
||||
/// FallbackPlanRewriter is a plan rewriter that will only push down table scan node
|
||||
@@ -38,9 +42,9 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
|
||||
fn f_down(
|
||||
&mut self,
|
||||
node: Self::Node,
|
||||
) -> datafusion_common::Result<datafusion_common::tree_node::Transformed<Self::Node>> {
|
||||
if let LogicalPlan::TableScan(table_scan) = &node {
|
||||
plan: Self::Node,
|
||||
) -> DfResult<datafusion_common::tree_node::Transformed<Self::Node>> {
|
||||
if let LogicalPlan::TableScan(table_scan) = &plan {
|
||||
let partition_cols = if let Some(source) = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
@@ -63,7 +67,29 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
"FallbackPlanRewriter: table {} has partition columns: {:?}",
|
||||
info.name, partition_cols
|
||||
);
|
||||
Some(partition_cols)
|
||||
Some(partition_cols
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
if c == OTHER_PHY_PART_COL_PLACEHOLDER {
|
||||
// for placeholder, just return a empty alias
|
||||
return Ok((c.clone(), BTreeSet::new()));
|
||||
}
|
||||
let index =
|
||||
plan.schema().index_of_column_by_name(None, &c).ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(
|
||||
format!(
|
||||
"PlanRewriter: maybe_set_partitions: column {c} not found in schema of plan: {plan}"
|
||||
),
|
||||
)
|
||||
})?;
|
||||
let column = plan.schema().columns().get(index).cloned().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"PlanRewriter: maybe_set_partitions: column index {index} out of bounds in schema of plan: {plan}"
|
||||
))
|
||||
})?;
|
||||
Ok((c.clone(), BTreeSet::from([column])))
|
||||
})
|
||||
.collect::<DfResult<AliasMapping>>()?)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -74,7 +100,7 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
None
|
||||
};
|
||||
let node = MergeScanLogicalPlan::new(
|
||||
node,
|
||||
plan,
|
||||
false,
|
||||
// at this stage, the partition cols should be set
|
||||
// treat it as non-partitioned if None
|
||||
@@ -83,7 +109,7 @@ impl TreeNodeRewriter for FallbackPlanRewriter {
|
||||
.into_logical_plan();
|
||||
Ok(Transformed::yes(node))
|
||||
} else {
|
||||
Ok(Transformed::no(node))
|
||||
Ok(Transformed::no(plan))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,17 +15,23 @@
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::IntervalDayTime;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_function::aggrs::aggr_wrapper::{StateMergeHelper, StateWrapper};
|
||||
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::execution::SessionState;
|
||||
use datafusion::functions_aggregate::expr_fn::avg;
|
||||
use datafusion::functions_aggregate::min_max::{max, min};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_common::JoinType;
|
||||
use datafusion_expr::{col, lit, Expr, LogicalPlanBuilder};
|
||||
use datafusion_expr::expr::ScalarFunction;
|
||||
use datafusion_expr::{col, lit, AggregateUDF, Expr, LogicalPlanBuilder};
|
||||
use datafusion_functions::datetime::date_bin;
|
||||
use datafusion_sql::TableReference;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
@@ -152,11 +158,30 @@ impl Stream for EmptyStream {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_encode_decode_substrait(plan: &LogicalPlan, state: SessionState) {
|
||||
let sub_plan_bytes = substrait::DFLogicalSubstraitConvertor
|
||||
.encode(plan, crate::query_engine::DefaultSerializer)
|
||||
.unwrap();
|
||||
let inner = sub_plan_bytes.clone();
|
||||
let decoded_plan = futures::executor::block_on(async move {
|
||||
substrait::DFLogicalSubstraitConvertor
|
||||
.decode(inner, state)
|
||||
.await
|
||||
}).inspect_err(|e|{
|
||||
use prost::Message;
|
||||
let sub_plan = substrait::substrait_proto_df::proto::Plan::decode(sub_plan_bytes).unwrap();
|
||||
common_telemetry::error!("Failed to decode substrait plan: {e},substrait plan: {sub_plan:#?}\nlogical plan: {plan:#?}");
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(*plan, decoded_plan);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_proj_sort_proj() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -199,11 +224,58 @@ fn expand_proj_sort_proj() {
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_proj_sort_partial_proj() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.project(vec![col("number"), col("pk1"), col("pk2"), col("pk3")])
|
||||
.unwrap()
|
||||
.project(vec![
|
||||
col("number"),
|
||||
col("pk1"),
|
||||
col("pk3"),
|
||||
col("pk1").eq(col("pk2")),
|
||||
])
|
||||
.unwrap()
|
||||
.sort(vec![col("t.pk1 = t.pk2").sort(true, true)])
|
||||
.unwrap()
|
||||
.project(vec![col("number"), col("t.pk1 = t.pk2").alias("eq_sorted")])
|
||||
.unwrap()
|
||||
.project(vec![col("number")])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
|
||||
let expected = [
|
||||
"Projection: t.number",
|
||||
" MergeSort: eq_sorted ASC NULLS FIRST", // notice how `eq_sorted` is used here
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Projection: t.number, eq_sorted", // notice how `eq_sorted` is added not `t.pk1 = t.pk2`
|
||||
" Projection: t.number, t.pk1 = t.pk2 AS eq_sorted",
|
||||
" Sort: t.pk1 = t.pk2 ASC NULLS FIRST",
|
||||
" Projection: t.number, t.pk1, t.pk3, t.pk1 = t.pk2",
|
||||
" Projection: t.number, t.pk1, t.pk2, t.pk3", // notice this projection doesn't add `t.pk1 = t.pk2` column requirement
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_sort_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -233,11 +305,13 @@ fn expand_sort_limit() {
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
/// Test merge sort can apply enforce dist requirement columns correctly and use the aliased column correctly, as there is
|
||||
/// a aliased sort column, there is no need to add a duplicate sort column using it's original column name
|
||||
#[test]
|
||||
fn expand_sort_alias_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -258,10 +332,10 @@ fn expand_sort_alias_limit() {
|
||||
let expected = [
|
||||
"Projection: something",
|
||||
" Limit: skip=0, fetch=10",
|
||||
" MergeSort: t.pk1 ASC NULLS LAST",
|
||||
" MergeSort: something ASC NULLS LAST",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Limit: skip=0, fetch=10",
|
||||
" Projection: t.pk1 AS something, t.pk1",
|
||||
" Projection: t.pk1 AS something",
|
||||
" Sort: t.pk1 ASC NULLS LAST",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
@@ -276,7 +350,7 @@ fn expand_sort_alias_limit() {
|
||||
fn expand_sort_alias_conflict_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -318,7 +392,7 @@ fn expand_sort_alias_conflict_limit() {
|
||||
fn expand_sort_alias_conflict_but_not_really_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -358,7 +432,7 @@ fn expand_sort_alias_conflict_but_not_really_limit() {
|
||||
fn expand_limit_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -391,7 +465,7 @@ fn expand_limit_sort() {
|
||||
fn expand_sort_limit_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -438,7 +512,7 @@ fn expand_sort_limit_sort() {
|
||||
fn expand_proj_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -473,7 +547,7 @@ fn expand_proj_step_aggr() {
|
||||
fn expand_proj_alias_fake_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -517,7 +591,7 @@ fn expand_proj_alias_fake_part_col_aggr() {
|
||||
fn expand_proj_alias_aliased_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -563,7 +637,7 @@ fn expand_proj_alias_aliased_part_col_aggr() {
|
||||
fn expand_part_col_aggr_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -596,7 +670,7 @@ fn expand_part_col_aggr_step_aggr() {
|
||||
fn expand_step_aggr_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -629,7 +703,7 @@ fn expand_step_aggr_step_aggr() {
|
||||
fn expand_part_col_aggr_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -673,7 +747,7 @@ fn expand_part_col_aggr_part_col_aggr() {
|
||||
fn expand_step_aggr_proj() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -709,7 +783,7 @@ fn expand_step_aggr_proj() {
|
||||
fn expand_proj_sort_step_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -750,7 +824,7 @@ fn expand_proj_sort_step_aggr_limit() {
|
||||
fn expand_proj_sort_limit_step_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -792,7 +866,7 @@ fn expand_proj_sort_limit_step_aggr() {
|
||||
fn expand_proj_limit_step_aggr_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -833,7 +907,7 @@ fn expand_proj_limit_step_aggr_sort() {
|
||||
fn expand_proj_sort_part_col_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -875,7 +949,7 @@ fn expand_proj_sort_part_col_aggr_limit() {
|
||||
fn expand_proj_sort_limit_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -917,7 +991,7 @@ fn expand_proj_sort_limit_part_col_aggr() {
|
||||
fn expand_proj_part_col_aggr_limit_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -959,7 +1033,7 @@ fn expand_proj_part_col_aggr_limit_sort() {
|
||||
fn expand_proj_part_col_aggr_sort_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1002,7 +1076,7 @@ fn expand_proj_part_col_aggr_sort_limit() {
|
||||
fn expand_proj_limit_part_col_aggr_sort() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1044,7 +1118,7 @@ fn expand_proj_limit_part_col_aggr_sort() {
|
||||
fn expand_proj_limit_sort_part_col_aggr() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1087,7 +1161,7 @@ fn expand_proj_limit_sort_part_col_aggr() {
|
||||
fn expand_step_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1120,7 +1194,7 @@ fn expand_step_aggr_limit() {
|
||||
fn expand_step_aggr_avg_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1153,7 +1227,7 @@ fn expand_step_aggr_avg_limit() {
|
||||
fn expand_part_col_aggr_limit() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
@@ -1332,10 +1406,224 @@ fn transform_unalighed_join_with_alias() {
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"TableScan: t",
|
||||
"]]",
|
||||
" SubqueryAlias: right",
|
||||
" Projection: t.number",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"TableScan: t",
|
||||
" Projection: right.number",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"SubqueryAlias: right",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_subquery_sort_alias() {
|
||||
init_default_ut_logging();
|
||||
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.alias("a")
|
||||
.unwrap()
|
||||
.sort(vec![col("a.number").sort(true, false)])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = [
|
||||
"Projection: a.pk1, a.pk2, a.pk3, a.ts, a.number",
|
||||
" MergeSort: a.number ASC NULLS LAST",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Sort: a.number ASC NULLS LAST",
|
||||
" SubqueryAlias: a",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_sort_subquery_alias() {
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.sort(vec![col("t.number").sort(true, false)])
|
||||
.unwrap()
|
||||
.alias("a")
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = [
|
||||
"Projection: a.pk1, a.pk2, a.pk3, a.ts, a.number",
|
||||
" MergeSort: a.number ASC NULLS LAST",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"SubqueryAlias: a",
|
||||
" Sort: t.number ASC NULLS LAST",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_bin_ts_group_by() {
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
let date_bin_call = Expr::ScalarFunction(ScalarFunction::new_udf(
|
||||
date_bin(),
|
||||
vec![
|
||||
lit(datafusion_common::ScalarValue::IntervalDayTime(Some(
|
||||
IntervalDayTime::new(0, 60 * 1000), // 1 minute in millis
|
||||
))),
|
||||
col("ts"),
|
||||
],
|
||||
));
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(vec![date_bin_call], vec![min(col("number"))])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
|
||||
let expected = [
|
||||
r#"Projection: date_bin(IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 60000 }"),t.ts), min(t.number)"#,
|
||||
r#" Aggregate: groupBy=[[date_bin(IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 60000 }"),t.ts)]], aggr=[[__min_merge(__min_state(t.number)) AS min(t.number)]]"#,
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
r#"Aggregate: groupBy=[[date_bin(IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 60000 }"), t.ts)]], aggr=[[__min_state(t.number)]]"#,
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_last_value_order_by() {
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_provider = Arc::new(DfTableProviderAdapter::new(test_table));
|
||||
let table_source = Arc::new(DefaultTableSource::new(table_provider.clone() as _));
|
||||
let ctx = SessionContext::new();
|
||||
ctx.register_table(TableReference::bare("t"), table_provider.clone() as _)
|
||||
.unwrap();
|
||||
ctx.register_udaf(AggregateUDF::new_from_impl(
|
||||
StateWrapper::new(
|
||||
datafusion::functions_aggregate::first_last::last_value_udaf()
|
||||
.as_ref()
|
||||
.clone(),
|
||||
)
|
||||
.unwrap(),
|
||||
));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source.clone(), None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(
|
||||
Vec::<Expr>::new(),
|
||||
vec![datafusion::functions_aggregate::first_last::last_value(
|
||||
col("ts"),
|
||||
vec![col("ts").sort(true, true)],
|
||||
)],
|
||||
)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
try_encode_decode_substrait(&plan, ctx.state());
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}
|
||||
.analyze(plan.clone(), &config)
|
||||
.unwrap();
|
||||
|
||||
let expected = [
|
||||
"Projection: last_value(t.ts) ORDER BY [t.ts ASC NULLS FIRST]",
|
||||
" Aggregate: groupBy=[[]], aggr=[[__last_value_merge(__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS FIRST]) AS last_value(t.ts) ORDER BY [t.ts ASC NULLS FIRST]]]",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Aggregate: groupBy=[[]], aggr=[[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS FIRST]]]",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
|
||||
let LogicalPlan::Aggregate(aggr_plan) = plan else {
|
||||
panic!("expect Aggregate plan");
|
||||
};
|
||||
let split = StateMergeHelper::split_aggr_node(aggr_plan).unwrap();
|
||||
|
||||
try_encode_decode_substrait(&split.lower_state, ctx.state());
|
||||
}
|
||||
|
||||
/// try remove the order by to see if it still works
|
||||
#[test]
|
||||
fn test_last_value_no_order_by() {
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_provider = Arc::new(DfTableProviderAdapter::new(test_table));
|
||||
let table_source = Arc::new(DefaultTableSource::new(table_provider.clone() as _));
|
||||
let ctx = SessionContext::new();
|
||||
ctx.register_table(TableReference::bare("t"), table_provider.clone() as _)
|
||||
.unwrap();
|
||||
ctx.register_udaf(AggregateUDF::new_from_impl(
|
||||
StateWrapper::new(
|
||||
datafusion::functions_aggregate::first_last::last_value_udaf()
|
||||
.as_ref()
|
||||
.clone(),
|
||||
)
|
||||
.unwrap(),
|
||||
));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(
|
||||
Vec::<Expr>::new(),
|
||||
vec![datafusion::functions_aggregate::first_last::last_value(
|
||||
col("ts"),
|
||||
vec![],
|
||||
)],
|
||||
)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let LogicalPlan::Aggregate(aggr_plan) = plan.clone() else {
|
||||
panic!("expect Aggregate plan");
|
||||
};
|
||||
let split = StateMergeHelper::split_aggr_node(aggr_plan).unwrap();
|
||||
|
||||
try_encode_decode_substrait(&split.lower_state, ctx.state());
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}
|
||||
.analyze(plan.clone(), &config)
|
||||
.unwrap();
|
||||
|
||||
let expected = [
|
||||
"Projection: last_value(t.ts)",
|
||||
" Aggregate: groupBy=[[]], aggr=[[__last_value_merge(__last_value_state(t.ts)) AS last_value(t.ts)]]",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
"Aggregate: groupBy=[[]], aggr=[[__last_value_state(t.ts)]]",
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,9 +15,9 @@
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_function::aggrs::aggr_wrapper::{aggr_state_func_name, StateMergeHelper};
|
||||
use common_function::function_registry::FUNCTION_REGISTRY;
|
||||
use common_function::aggrs::aggr_wrapper::{is_all_aggr_exprs_steppable, StateMergeHelper};
|
||||
use common_telemetry::debug;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
|
||||
use promql::extension_plan::{
|
||||
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
|
||||
@@ -71,38 +71,6 @@ pub fn step_aggr_to_upper_aggr(
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
/// Check if the given aggregate expression is steppable.
|
||||
/// As in if it can be split into multiple steps:
|
||||
/// i.e. on datanode first call `state(input)` then
|
||||
/// on frontend call `calc(merge(state))` to get the final result.
|
||||
pub fn is_all_aggr_exprs_steppable(aggr_exprs: &[Expr]) -> bool {
|
||||
aggr_exprs.iter().all(|expr| {
|
||||
if let Some(aggr_func) = get_aggr_func(expr) {
|
||||
if aggr_func.params.distinct {
|
||||
// Distinct aggregate functions are not steppable(yet).
|
||||
return false;
|
||||
}
|
||||
|
||||
// whether the corresponding state function exists in the registry
|
||||
FUNCTION_REGISTRY.is_aggr_func_exist(&aggr_state_func_name(aggr_func.func.name()))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
|
||||
let mut expr_ref = expr;
|
||||
while let Expr::Alias(alias) = expr_ref {
|
||||
expr_ref = &alias.expr;
|
||||
}
|
||||
if let Expr::AggregateFunction(aggr_func) = expr_ref {
|
||||
Some(aggr_func)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub enum Commutativity {
|
||||
Commutative,
|
||||
@@ -121,15 +89,18 @@ pub enum Commutativity {
|
||||
pub struct Categorizer {}
|
||||
|
||||
impl Categorizer {
|
||||
pub fn check_plan(plan: &LogicalPlan, partition_cols: Option<AliasMapping>) -> Commutativity {
|
||||
pub fn check_plan(
|
||||
plan: &LogicalPlan,
|
||||
partition_cols: Option<AliasMapping>,
|
||||
) -> DfResult<Commutativity> {
|
||||
let partition_cols = partition_cols.unwrap_or_default();
|
||||
|
||||
match plan {
|
||||
let comm = match plan {
|
||||
LogicalPlan::Projection(proj) => {
|
||||
for expr in &proj.expr {
|
||||
let commutativity = Self::check_expr(expr);
|
||||
if !matches!(commutativity, Commutativity::Commutative) {
|
||||
return commutativity;
|
||||
return Ok(commutativity);
|
||||
}
|
||||
}
|
||||
Commutativity::Commutative
|
||||
@@ -142,24 +113,27 @@ impl Categorizer {
|
||||
let matches_partition = Self::check_partition(&aggr.group_expr, &partition_cols);
|
||||
if !matches_partition && is_all_steppable {
|
||||
debug!("Plan is steppable: {plan}");
|
||||
return Commutativity::TransformedCommutative {
|
||||
return Ok(Commutativity::TransformedCommutative {
|
||||
transformer: Some(Arc::new(|plan: &LogicalPlan| {
|
||||
debug!("Before Step optimize: {plan}");
|
||||
let ret = step_aggr_to_upper_aggr(plan);
|
||||
ret.ok().map(|s| TransformerAction {
|
||||
ret.inspect_err(|err| {
|
||||
common_telemetry::error!("Failed to step aggregate plan: {err:?}");
|
||||
})
|
||||
.map(|s| TransformerAction {
|
||||
extra_parent_plans: s.extra_parent_plans,
|
||||
new_child_plan: s.new_child_plan,
|
||||
})
|
||||
})),
|
||||
};
|
||||
});
|
||||
}
|
||||
if !matches_partition {
|
||||
return Commutativity::NonCommutative;
|
||||
return Ok(Commutativity::NonCommutative);
|
||||
}
|
||||
for expr in &aggr.aggr_expr {
|
||||
let commutativity = Self::check_expr(expr);
|
||||
if !matches!(commutativity, Commutativity::Commutative) {
|
||||
return commutativity;
|
||||
return Ok(commutativity);
|
||||
}
|
||||
}
|
||||
// all group by expressions are partition columns can push down, unless
|
||||
@@ -170,7 +144,7 @@ impl Categorizer {
|
||||
}
|
||||
LogicalPlan::Sort(_) => {
|
||||
if partition_cols.is_empty() {
|
||||
return Commutativity::Commutative;
|
||||
return Ok(Commutativity::Commutative);
|
||||
}
|
||||
|
||||
// sort plan needs to consider column priority
|
||||
@@ -187,7 +161,7 @@ impl Categorizer {
|
||||
LogicalPlan::TableScan(_) => Commutativity::Commutative,
|
||||
LogicalPlan::EmptyRelation(_) => Commutativity::NonCommutative,
|
||||
LogicalPlan::Subquery(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::SubqueryAlias(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::SubqueryAlias(_) => Commutativity::Commutative,
|
||||
LogicalPlan::Limit(limit) => {
|
||||
// Only execute `fetch` on remote nodes.
|
||||
// wait for https://github.com/apache/arrow-datafusion/pull/7669
|
||||
@@ -219,7 +193,9 @@ impl Categorizer {
|
||||
LogicalPlan::Ddl(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Copy(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::RecursiveQuery(_) => Commutativity::Unsupported,
|
||||
}
|
||||
};
|
||||
|
||||
Ok(comm)
|
||||
}
|
||||
|
||||
pub fn check_extension_plan(
|
||||
@@ -302,6 +278,10 @@ impl Categorizer {
|
||||
|
||||
/// Return true if the given expr and partition cols satisfied the rule.
|
||||
/// In this case the plan can be treated as fully commutative.
|
||||
///
|
||||
/// So only if all partition columns show up in `exprs`, return true.
|
||||
/// Otherwise return false.
|
||||
///
|
||||
fn check_partition(exprs: &[Expr], partition_cols: &AliasMapping) -> bool {
|
||||
let mut ref_cols = HashSet::new();
|
||||
for expr in exprs {
|
||||
@@ -330,7 +310,7 @@ impl Categorizer {
|
||||
pub type Transformer = Arc<dyn Fn(&LogicalPlan) -> Option<LogicalPlan>>;
|
||||
|
||||
/// Returns transformer action that need to be applied
|
||||
pub type StageTransformer = Arc<dyn Fn(&LogicalPlan) -> Option<TransformerAction>>;
|
||||
pub type StageTransformer = Arc<dyn Fn(&LogicalPlan) -> DfResult<TransformerAction>>;
|
||||
|
||||
/// The Action that a transformer should take on the plan.
|
||||
pub struct TransformerAction {
|
||||
@@ -365,7 +345,7 @@ mod test {
|
||||
fetch: None,
|
||||
});
|
||||
assert!(matches!(
|
||||
Categorizer::check_plan(&plan, Some(Default::default())),
|
||||
Categorizer::check_plan(&plan, Some(Default::default())).unwrap(),
|
||||
Commutativity::Commutative
|
||||
));
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ use store_api::storage::RegionId;
|
||||
use table::table_name::TableName;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::dist_plan::analyzer::AliasMapping;
|
||||
use crate::error::ConvertSchemaSnafu;
|
||||
use crate::metrics::{MERGE_SCAN_ERRORS_TOTAL, MERGE_SCAN_POLL_ELAPSED, MERGE_SCAN_REGIONS};
|
||||
use crate::region_query::RegionQueryHandlerRef;
|
||||
@@ -62,7 +63,7 @@ pub struct MergeScanLogicalPlan {
|
||||
input: LogicalPlan,
|
||||
/// If this plan is a placeholder
|
||||
is_placeholder: bool,
|
||||
partition_cols: Vec<String>,
|
||||
partition_cols: AliasMapping,
|
||||
}
|
||||
|
||||
impl UserDefinedLogicalNodeCore for MergeScanLogicalPlan {
|
||||
@@ -103,7 +104,7 @@ impl UserDefinedLogicalNodeCore for MergeScanLogicalPlan {
|
||||
}
|
||||
|
||||
impl MergeScanLogicalPlan {
|
||||
pub fn new(input: LogicalPlan, is_placeholder: bool, partition_cols: Vec<String>) -> Self {
|
||||
pub fn new(input: LogicalPlan, is_placeholder: bool, partition_cols: AliasMapping) -> Self {
|
||||
Self {
|
||||
input,
|
||||
is_placeholder,
|
||||
@@ -130,7 +131,7 @@ impl MergeScanLogicalPlan {
|
||||
&self.input
|
||||
}
|
||||
|
||||
pub fn partition_cols(&self) -> &[String] {
|
||||
pub fn partition_cols(&self) -> &AliasMapping {
|
||||
&self.partition_cols
|
||||
}
|
||||
}
|
||||
@@ -150,7 +151,7 @@ pub struct MergeScanExec {
|
||||
partition_metrics: Arc<Mutex<HashMap<usize, PartitionMetrics>>>,
|
||||
query_ctx: QueryContextRef,
|
||||
target_partition: usize,
|
||||
partition_cols: Vec<String>,
|
||||
partition_cols: AliasMapping,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for MergeScanExec {
|
||||
@@ -175,7 +176,7 @@ impl MergeScanExec {
|
||||
region_query_handler: RegionQueryHandlerRef,
|
||||
query_ctx: QueryContextRef,
|
||||
target_partition: usize,
|
||||
partition_cols: Vec<String>,
|
||||
partition_cols: AliasMapping,
|
||||
) -> Result<Self> {
|
||||
// TODO(CookiePieWw): Initially we removed the metadata from the schema in #2000, but we have to
|
||||
// keep it for #4619 to identify json type in src/datatypes/src/schema/column_schema.rs.
|
||||
@@ -215,12 +216,18 @@ impl MergeScanExec {
|
||||
let partition_exprs = partition_cols
|
||||
.iter()
|
||||
.filter_map(|col| {
|
||||
session_state
|
||||
.create_physical_expr(
|
||||
Expr::Column(ColumnExpr::new_unqualified(col)),
|
||||
plan.schema(),
|
||||
)
|
||||
.ok()
|
||||
if let Some(first_alias) = col.1.first() {
|
||||
session_state
|
||||
.create_physical_expr(
|
||||
Expr::Column(ColumnExpr::new_unqualified(
|
||||
first_alias.name().to_string(),
|
||||
)),
|
||||
plan.schema(),
|
||||
)
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let partitioning = Partitioning::Hash(partition_exprs, target_partition);
|
||||
@@ -420,17 +427,22 @@ impl MergeScanExec {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut hash_cols = HashSet::default();
|
||||
let all_partition_col_aliases: HashSet<_> = self
|
||||
.partition_cols
|
||||
.values()
|
||||
.flat_map(|aliases| aliases.iter().map(|c| c.name()))
|
||||
.collect();
|
||||
let mut overlaps = vec![];
|
||||
for expr in &hash_exprs {
|
||||
if let Some(col_expr) = expr.as_any().downcast_ref::<Column>() {
|
||||
hash_cols.insert(col_expr.name());
|
||||
if let Some(col_expr) = expr.as_any().downcast_ref::<Column>()
|
||||
&& all_partition_col_aliases.contains(col_expr.name())
|
||||
{
|
||||
overlaps.push(expr.clone());
|
||||
}
|
||||
}
|
||||
for col in &self.partition_cols {
|
||||
if !hash_cols.contains(col.as_str()) {
|
||||
// The partitioning columns are not the same
|
||||
return None;
|
||||
}
|
||||
|
||||
if overlaps.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Self {
|
||||
@@ -443,7 +455,7 @@ impl MergeScanExec {
|
||||
metric: self.metric.clone(),
|
||||
properties: PlanProperties::new(
|
||||
self.properties.eq_properties.clone(),
|
||||
Partitioning::Hash(hash_exprs, self.target_partition),
|
||||
Partitioning::Hash(overlaps, self.target_partition),
|
||||
self.properties.emission_type,
|
||||
self.properties.boundedness,
|
||||
),
|
||||
|
||||
@@ -177,7 +177,7 @@ impl ExtensionPlanner for DistExtensionPlanner {
|
||||
self.region_query_handler.clone(),
|
||||
query_ctx,
|
||||
session_state.config().target_partitions(),
|
||||
merge_scan.partition_cols().to_vec(),
|
||||
merge_scan.partition_cols().clone(),
|
||||
)?;
|
||||
Ok(Some(Arc::new(merge_scan_plan) as _))
|
||||
}
|
||||
|
||||
@@ -88,6 +88,10 @@ impl CountWildcardToTimeIndexRule {
|
||||
// check if the time index is a valid column as for current plan
|
||||
if let Some(col) = &col {
|
||||
let mut is_valid = false;
|
||||
// if more than one input, we give up and just use `count(1)`
|
||||
if plan.inputs().len() > 1 {
|
||||
return None;
|
||||
}
|
||||
for input in plan.inputs() {
|
||||
if input.schema().has_column(col) {
|
||||
is_valid = true;
|
||||
@@ -171,6 +175,11 @@ impl TreeNodeVisitor<'_> for TimeIndexFinder {
|
||||
}
|
||||
}
|
||||
|
||||
if node.inputs().len() > 1 {
|
||||
// if more than one input, we give up and just use `count(1)`
|
||||
return Ok(TreeNodeRecursion::Stop);
|
||||
}
|
||||
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ use std::sync::Arc;
|
||||
use datafusion::config::ConfigOptions;
|
||||
use datafusion::physical_optimizer::PhysicalOptimizerRule;
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode};
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_physical_expr::Distribution;
|
||||
|
||||
@@ -56,26 +55,52 @@ impl PassDistribution {
|
||||
plan: Arc<dyn ExecutionPlan>,
|
||||
_config: &ConfigOptions,
|
||||
) -> DfResult<Arc<dyn ExecutionPlan>> {
|
||||
let mut distribution_requirement = None;
|
||||
let result = plan.transform_down(|plan| {
|
||||
if let Some(distribution) = plan.required_input_distribution().first()
|
||||
&& !matches!(distribution, Distribution::UnspecifiedDistribution)
|
||||
// incorrect workaround, doesn't fix the actual issue
|
||||
&& plan.name() != "HashJoinExec"
|
||||
{
|
||||
distribution_requirement = Some(distribution.clone());
|
||||
}
|
||||
// Start from root with no requirement
|
||||
Self::rewrite_with_distribution(plan, None)
|
||||
}
|
||||
|
||||
if let Some(merge_scan) = plan.as_any().downcast_ref::<MergeScanExec>()
|
||||
&& let Some(distribution) = distribution_requirement.as_ref()
|
||||
&& let Some(new_plan) = merge_scan.try_with_new_distribution(distribution.clone())
|
||||
{
|
||||
Ok(Transformed::yes(Arc::new(new_plan) as _))
|
||||
} else {
|
||||
Ok(Transformed::no(plan))
|
||||
}
|
||||
})?;
|
||||
/// Top-down rewrite that propagates distribution requirements to children.
|
||||
fn rewrite_with_distribution(
|
||||
plan: Arc<dyn ExecutionPlan>,
|
||||
current_req: Option<Distribution>,
|
||||
) -> DfResult<Arc<dyn ExecutionPlan>> {
|
||||
// If this is a MergeScanExec, try to apply the current requirement.
|
||||
if let Some(merge_scan) = plan.as_any().downcast_ref::<MergeScanExec>()
|
||||
&& let Some(distribution) = current_req.as_ref()
|
||||
&& let Some(new_plan) = merge_scan.try_with_new_distribution(distribution.clone())
|
||||
{
|
||||
// Leaf node; no children to process
|
||||
return Ok(Arc::new(new_plan) as _);
|
||||
}
|
||||
|
||||
Ok(result.data)
|
||||
// Compute per-child requirements from the current node.
|
||||
let children = plan.children();
|
||||
if children.is_empty() {
|
||||
return Ok(plan);
|
||||
}
|
||||
|
||||
let required = plan.required_input_distribution();
|
||||
let mut new_children = Vec::with_capacity(children.len());
|
||||
for (idx, child) in children.into_iter().enumerate() {
|
||||
let child_req = match required.get(idx) {
|
||||
Some(Distribution::UnspecifiedDistribution) => None,
|
||||
None => current_req.clone(),
|
||||
Some(req) => Some(req.clone()),
|
||||
};
|
||||
let new_child = Self::rewrite_with_distribution(child.clone(), child_req)?;
|
||||
new_children.push(new_child);
|
||||
}
|
||||
|
||||
// Rebuild the node only if any child changed (pointer inequality)
|
||||
let unchanged = plan
|
||||
.children()
|
||||
.into_iter()
|
||||
.zip(new_children.iter())
|
||||
.all(|(old, new)| Arc::ptr_eq(old, new));
|
||||
if unchanged {
|
||||
Ok(plan)
|
||||
} else {
|
||||
plan.with_new_children(new_children)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::collections::HashSet;
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use arrow_schema::SortOptions;
|
||||
use common_function::aggrs::aggr_wrapper::aggr_state_func_name;
|
||||
use common_recordbatch::OrderOption;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor};
|
||||
@@ -217,7 +218,8 @@ impl TreeNodeVisitor<'_> for ScanHintVisitor {
|
||||
is_all_last_value = false;
|
||||
break;
|
||||
};
|
||||
if func.func.name() != "last_value"
|
||||
if (func.func.name() != "last_value"
|
||||
&& func.func.name() != aggr_state_func_name("last_value"))
|
||||
|| func.params.filter.is_some()
|
||||
|| func.params.distinct
|
||||
{
|
||||
|
||||
@@ -282,6 +282,16 @@ impl DfLogicalPlanner {
|
||||
.build()
|
||||
.context(PlanSqlSnafu)?;
|
||||
}
|
||||
|
||||
// Wrap in SubqueryAlias to ensure proper table qualification for CTE
|
||||
logical_plan = LogicalPlan::SubqueryAlias(
|
||||
datafusion_expr::SubqueryAlias::try_new(
|
||||
Arc::new(logical_plan),
|
||||
cte.name.value.clone(),
|
||||
)
|
||||
.context(PlanSqlSnafu)?,
|
||||
);
|
||||
|
||||
planner_context.insert_cte(&cte.name.value, logical_plan);
|
||||
}
|
||||
CteContent::Sql(_) => {
|
||||
|
||||
@@ -1222,12 +1222,13 @@ impl PromPlanner {
|
||||
let mut exprs = Vec::with_capacity(labels.labels.len());
|
||||
for label in &labels.labels {
|
||||
// nonexistence label will be ignored
|
||||
if let Ok(field) = input_schema.field_with_unqualified_name(label) {
|
||||
exprs.push(DfExpr::Column(Column::from(field.name())));
|
||||
if let Some(column_name) = Self::find_case_sensitive_column(input_schema, label)
|
||||
{
|
||||
exprs.push(DfExpr::Column(Column::from_name(column_name.clone())));
|
||||
|
||||
if update_ctx {
|
||||
// update the tag columns in context
|
||||
self.ctx.tag_columns.push(label.clone());
|
||||
self.ctx.tag_columns.push(column_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1290,13 +1291,12 @@ impl PromPlanner {
|
||||
continue;
|
||||
}
|
||||
|
||||
let col = if table_schema
|
||||
.field_with_unqualified_name(&matcher.name)
|
||||
.is_err()
|
||||
{
|
||||
DfExpr::Literal(ScalarValue::Utf8(Some(String::new())), None).alias(matcher.name)
|
||||
let column_name = Self::find_case_sensitive_column(table_schema, matcher.name.as_str());
|
||||
let col = if let Some(column_name) = column_name {
|
||||
DfExpr::Column(Column::from_name(column_name))
|
||||
} else {
|
||||
DfExpr::Column(Column::from_name(matcher.name))
|
||||
DfExpr::Literal(ScalarValue::Utf8(Some(String::new())), None)
|
||||
.alias(matcher.name.clone())
|
||||
};
|
||||
let lit = DfExpr::Literal(ScalarValue::Utf8(Some(matcher.value)), None);
|
||||
let expr = match matcher.op {
|
||||
@@ -1353,6 +1353,14 @@ impl PromPlanner {
|
||||
Ok(exprs)
|
||||
}
|
||||
|
||||
fn find_case_sensitive_column(schema: &DFSchemaRef, column: &str) -> Option<String> {
|
||||
schema
|
||||
.fields()
|
||||
.iter()
|
||||
.find(|field| field.name() == column)
|
||||
.map(|field| field.name().clone())
|
||||
}
|
||||
|
||||
fn table_ref(&self) -> Result<TableReference> {
|
||||
let table_name = self
|
||||
.ctx
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::{Arc, RwLock};
|
||||
use async_trait::async_trait;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_base::Plugins;
|
||||
use common_function::aggrs::aggr_wrapper::fix_order::FixStateUdafOrderingAnalyzer;
|
||||
use common_function::function_factory::ScalarFunctionFactory;
|
||||
use common_function::handlers::{
|
||||
FlowServiceHandlerRef, ProcedureServiceHandlerRef, TableMutationHandlerRef,
|
||||
@@ -136,6 +137,8 @@ impl QueryEngineState {
|
||||
analyzer.rules.push(Arc::new(DistPlannerAnalyzer));
|
||||
}
|
||||
|
||||
analyzer.rules.push(Arc::new(FixStateUdafOrderingAnalyzer));
|
||||
|
||||
let mut optimizer = Optimizer::new();
|
||||
optimizer.rules.push(Arc::new(ScanHintRule));
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.11.3
|
||||
v0.11.6
|
||||
|
||||
@@ -71,8 +71,8 @@ impl JemallocCollector {
|
||||
let _ = self.epoch.advance().context(UpdateJemallocMetricsSnafu)?;
|
||||
let allocated = self.allocated.read().context(UpdateJemallocMetricsSnafu)?;
|
||||
let resident = self.resident.read().context(UpdateJemallocMetricsSnafu)?;
|
||||
SYS_JEMALLOC_RESIDEN.set(allocated as i64);
|
||||
SYS_JEMALLOC_ALLOCATED.set(resident as i64);
|
||||
SYS_JEMALLOC_ALLOCATED.set(allocated as i64);
|
||||
SYS_JEMALLOC_RESIDEN.set(resident as i64);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ use ahash::{HashMap, HashSet};
|
||||
use api::v1::{RowInsertRequests, Value};
|
||||
use common_grpc::precision::Precision;
|
||||
use common_query::prelude::{GREPTIME_COUNT, GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use otel_arrow_rust::proto::opentelemetry::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use otel_arrow_rust::proto::opentelemetry::common::v1::{any_value, AnyValue, KeyValue};
|
||||
@@ -251,10 +250,20 @@ fn process_scope_attrs(scope: &ScopeMetrics, metric_ctx: &OtlpMetricCtx) -> Opti
|
||||
|
||||
// See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/145942706622aba5c276ca47f48df438228bfea4/pkg/translator/prometheus/normalize_name.go#L55
|
||||
pub fn normalize_metric_name(metric: &Metric, metric_type: &MetricType) -> String {
|
||||
let mut name_tokens = NON_ALPHA_NUM_CHAR
|
||||
// Split metric name in "tokens" (remove all non-alphanumeric), filtering out empty strings
|
||||
let mut name_tokens: Vec<String> = NON_ALPHA_NUM_CHAR
|
||||
.split(&metric.name)
|
||||
.map(|s| s.to_string())
|
||||
.collect_vec();
|
||||
.filter_map(|s| {
|
||||
let trimmed = s.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Append unit if it exists
|
||||
if !metric.unit.is_empty() {
|
||||
let (main, per) = build_unit_suffix(&metric.unit);
|
||||
if let Some(main) = main
|
||||
@@ -270,17 +279,24 @@ pub fn normalize_metric_name(metric: &Metric, metric_type: &MetricType) -> Strin
|
||||
}
|
||||
}
|
||||
|
||||
// Append _total for Counters (monotonic sums)
|
||||
if matches!(metric_type, MetricType::MonotonicSum) {
|
||||
// Remove existing "total" tokens first, then append
|
||||
name_tokens.retain(|t| t != TOTAL);
|
||||
name_tokens.push(TOTAL.to_string());
|
||||
}
|
||||
|
||||
// Append _ratio for metrics with unit "1" (gauges only)
|
||||
if metric.unit == "1" && matches!(metric_type, MetricType::Gauge) {
|
||||
// Remove existing "ratio" tokens first, then append
|
||||
name_tokens.retain(|t| t != RATIO);
|
||||
name_tokens.push(RATIO.to_string());
|
||||
}
|
||||
|
||||
// Build the string from the tokens, separated with underscores
|
||||
let name = name_tokens.join(UNDERSCORE);
|
||||
|
||||
// Metric name cannot start with a digit, so prefix it with "_" in this case
|
||||
if let Some((_, first)) = name.char_indices().next()
|
||||
&& first >= '0'
|
||||
&& first <= '9'
|
||||
@@ -298,7 +314,8 @@ fn build_unit_suffix(unit: &str) -> (Option<String>, Option<String>) {
|
||||
|
||||
fn check_unit(unit_str: &str, unit_map: &HashMap<String, String>) -> Option<String> {
|
||||
let u = unit_str.trim();
|
||||
if !u.is_empty() && !u.contains("{}") {
|
||||
// Skip units that are empty, contain "{" or "}" characters
|
||||
if !u.is_empty() && !u.contains('{') && !u.contains('}') {
|
||||
let u = unit_map.get(u).map(|s| s.as_ref()).unwrap_or(u);
|
||||
let u = clean_unit_name(u);
|
||||
if !u.is_empty() {
|
||||
@@ -309,7 +326,13 @@ fn check_unit(unit_str: &str, unit_map: &HashMap<String, String>) -> Option<Stri
|
||||
}
|
||||
|
||||
fn clean_unit_name(name: &str) -> String {
|
||||
NON_ALPHA_NUM_CHAR.split(name).join(UNDERSCORE)
|
||||
// Split on non-alphanumeric characters, filter out empty strings, then join with underscores
|
||||
// This matches the Go implementation: strings.FieldsFunc + strings.Join
|
||||
NON_ALPHA_NUM_CHAR
|
||||
.split(name)
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect::<Vec<&str>>()
|
||||
.join(UNDERSCORE)
|
||||
}
|
||||
|
||||
// See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/145942706622aba5c276ca47f48df438228bfea4/pkg/translator/prometheus/normalize_label.go#L27
|
||||
@@ -1037,6 +1060,57 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_metric_name_edge_cases() {
|
||||
let test_cases = vec![
|
||||
// Edge case: name with multiple non-alphanumeric chars in a row
|
||||
(
|
||||
Metric {
|
||||
name: "foo--bar__baz".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_bar_baz",
|
||||
),
|
||||
// Edge case: name starting and ending with non-alphanumeric
|
||||
(
|
||||
Metric {
|
||||
name: "-foo_bar-".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_bar",
|
||||
),
|
||||
// Edge case: name with only special chars (should be empty)
|
||||
(
|
||||
Metric {
|
||||
name: "--___--".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"",
|
||||
),
|
||||
// Edge case: name starting with digit
|
||||
(
|
||||
Metric {
|
||||
name: "2xx_requests".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"_2xx_requests",
|
||||
),
|
||||
];
|
||||
|
||||
for (metric, metric_type, expected) in test_cases {
|
||||
let result = normalize_metric_name(&metric, &metric_type);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"Failed for metric name: '{}', unit: '{}', type: {:?}",
|
||||
metric.name, metric.unit, metric_type
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_label_name() {
|
||||
let test_cases = vec![
|
||||
@@ -1058,6 +1132,320 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_unit_name() {
|
||||
// Test the improved clean_unit_name function
|
||||
assert_eq!(clean_unit_name("faults"), "faults");
|
||||
assert_eq!(clean_unit_name("{faults}"), "faults"); // clean_unit_name still processes braces internally
|
||||
assert_eq!(clean_unit_name("req/sec"), "req_sec");
|
||||
assert_eq!(clean_unit_name("m/s"), "m_s");
|
||||
assert_eq!(clean_unit_name("___test___"), "test");
|
||||
assert_eq!(
|
||||
clean_unit_name("multiple__underscores"),
|
||||
"multiple_underscores"
|
||||
);
|
||||
assert_eq!(clean_unit_name(""), "");
|
||||
assert_eq!(clean_unit_name("___"), "");
|
||||
assert_eq!(clean_unit_name("bytes.per.second"), "bytes_per_second");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_metric_name_braced_units() {
|
||||
// Test that units with braces are rejected (not processed)
|
||||
let test_cases = vec![
|
||||
(
|
||||
Metric {
|
||||
name: "test.metric".to_string(),
|
||||
unit: "{faults}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"test_metric_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "test.metric".to_string(),
|
||||
unit: "{operations}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"test_metric", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "test.metric".to_string(),
|
||||
unit: "{}".to_string(), // empty braces should be ignored due to contains('{') || contains('}')
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"test_metric",
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "test.metric".to_string(),
|
||||
unit: "faults".to_string(), // no braces, should work normally
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"test_metric_faults",
|
||||
),
|
||||
];
|
||||
|
||||
for (metric, metric_type, expected) in test_cases {
|
||||
let result = normalize_metric_name(&metric, &metric_type);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"Failed for metric name: '{}', unit: '{}', type: {:?}. Got: '{}', Expected: '{}'",
|
||||
metric.name, metric.unit, metric_type, result, expected
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_metric_name_with_testdata() {
|
||||
// Test cases extracted from real OTLP metrics data from testdata.txt
|
||||
let test_cases = vec![
|
||||
// Basic system metrics with various units
|
||||
(
|
||||
Metric {
|
||||
name: "system.paging.faults".to_string(),
|
||||
unit: "{faults}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_paging_faults_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.paging.operations".to_string(),
|
||||
unit: "{operations}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_paging_operations_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.paging.usage".to_string(),
|
||||
unit: "By".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::NonMonotonicSum,
|
||||
"system_paging_usage_bytes",
|
||||
),
|
||||
// Load average metrics - gauge with custom unit
|
||||
(
|
||||
Metric {
|
||||
name: "system.cpu.load_average.15m".to_string(),
|
||||
unit: "{thread}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"system_cpu_load_average_15m", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.cpu.load_average.1m".to_string(),
|
||||
unit: "{thread}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"system_cpu_load_average_1m", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
// Disk I/O with bytes unit
|
||||
(
|
||||
Metric {
|
||||
name: "system.disk.io".to_string(),
|
||||
unit: "By".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_disk_io_bytes_total",
|
||||
),
|
||||
// Time-based metrics with seconds unit
|
||||
(
|
||||
Metric {
|
||||
name: "system.disk.io_time".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_disk_io_time_seconds_total",
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.disk.operation_time".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_disk_operation_time_seconds_total",
|
||||
),
|
||||
// CPU time metric
|
||||
(
|
||||
Metric {
|
||||
name: "system.cpu.time".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_cpu_time_seconds_total",
|
||||
),
|
||||
// Process counts
|
||||
(
|
||||
Metric {
|
||||
name: "system.processes.count".to_string(),
|
||||
unit: "{processes}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::NonMonotonicSum,
|
||||
"system_processes_count", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.processes.created".to_string(),
|
||||
unit: "{processes}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_processes_created_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
// Memory usage with bytes
|
||||
(
|
||||
Metric {
|
||||
name: "system.memory.usage".to_string(),
|
||||
unit: "By".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::NonMonotonicSum,
|
||||
"system_memory_usage_bytes",
|
||||
),
|
||||
// Uptime as gauge
|
||||
(
|
||||
Metric {
|
||||
name: "system.uptime".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"system_uptime_seconds",
|
||||
),
|
||||
// Network metrics
|
||||
(
|
||||
Metric {
|
||||
name: "system.network.connections".to_string(),
|
||||
unit: "{connections}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::NonMonotonicSum,
|
||||
"system_network_connections", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.network.dropped".to_string(),
|
||||
unit: "{packets}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_network_dropped_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.network.errors".to_string(),
|
||||
unit: "{errors}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_network_errors_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.network.io".to_string(),
|
||||
unit: "By".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_network_io_bytes_total",
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.network.packets".to_string(),
|
||||
unit: "{packets}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"system_network_packets_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
// Filesystem metrics
|
||||
(
|
||||
Metric {
|
||||
name: "system.filesystem.inodes.usage".to_string(),
|
||||
unit: "{inodes}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::NonMonotonicSum,
|
||||
"system_filesystem_inodes_usage", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "system.filesystem.usage".to_string(),
|
||||
unit: "By".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::NonMonotonicSum,
|
||||
"system_filesystem_usage_bytes",
|
||||
),
|
||||
// Edge cases with special characters and numbers
|
||||
(
|
||||
Metric {
|
||||
name: "system.load.1".to_string(),
|
||||
unit: "1".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"system_load_1_ratio",
|
||||
),
|
||||
(
|
||||
Metric {
|
||||
name: "http.request.2xx".to_string(),
|
||||
unit: "{requests}".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"http_request_2xx_total", // braced units are rejected, no unit suffix added
|
||||
),
|
||||
// Metric with dots and underscores mixed
|
||||
(
|
||||
Metric {
|
||||
name: "jvm.memory.heap_usage".to_string(),
|
||||
unit: "By".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"jvm_memory_heap_usage_bytes",
|
||||
),
|
||||
// Complex unit with per-second
|
||||
(
|
||||
Metric {
|
||||
name: "http.request.rate".to_string(),
|
||||
unit: "1/s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"http_request_rate_per_second",
|
||||
),
|
||||
];
|
||||
|
||||
for (metric, metric_type, expected) in test_cases {
|
||||
let result = normalize_metric_name(&metric, &metric_type);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"Failed for metric name: '{}', unit: '{}', type: {:?}. Got: '{}', Expected: '{}'",
|
||||
metric.name, metric.unit, metric_type, result, expected
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn keyvalue(key: &str, value: &str) -> KeyValue {
|
||||
KeyValue {
|
||||
key: key.into(),
|
||||
|
||||
@@ -292,11 +292,18 @@ impl<'a> ParserContext<'a> {
|
||||
|
||||
let output_table_name = self.intern_parse_table_name()?;
|
||||
|
||||
let expire_after = if self
|
||||
.parser
|
||||
.consume_tokens(&[Token::make_keyword(EXPIRE), Token::make_keyword(AFTER)])
|
||||
let expire_after = if let Token::Word(w1) = &self.parser.peek_token().token
|
||||
&& w1.value.eq_ignore_ascii_case(EXPIRE)
|
||||
{
|
||||
Some(self.parse_interval_no_month("EXPIRE AFTER")?)
|
||||
self.parser.next_token();
|
||||
if let Token::Word(w2) = &self.parser.peek_token().token
|
||||
&& w2.value.eq_ignore_ascii_case(AFTER)
|
||||
{
|
||||
self.parser.next_token();
|
||||
Some(self.parse_interval_no_month("EXPIRE AFTER")?)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -1500,6 +1507,45 @@ SELECT max(c1), min(c2) FROM schema_2.table_2;",
|
||||
comment: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
r"
|
||||
create flow `task_3`
|
||||
sink to schema_1.table_1
|
||||
expire after '10 minutes'
|
||||
as
|
||||
select max(c1), min(c2) from schema_2.table_2;",
|
||||
CreateFlowWoutQuery {
|
||||
flow_name: ObjectName::from(vec![Ident::with_quote('`', "task_3")]),
|
||||
sink_table_name: ObjectName::from(vec![
|
||||
Ident::new("schema_1"),
|
||||
Ident::new("table_1"),
|
||||
]),
|
||||
or_replace: false,
|
||||
if_not_exists: false,
|
||||
expire_after: Some(600), // 10 minutes in seconds
|
||||
comment: None,
|
||||
},
|
||||
),
|
||||
(
|
||||
r"
|
||||
create or replace flow if not exists task_4
|
||||
sink to schema_1.table_1
|
||||
expire after interval '2 hours'
|
||||
comment 'lowercase test'
|
||||
as
|
||||
select max(c1), min(c2) from schema_2.table_2;",
|
||||
CreateFlowWoutQuery {
|
||||
flow_name: ObjectName::from(vec![Ident::new("task_4")]),
|
||||
sink_table_name: ObjectName::from(vec![
|
||||
Ident::new("schema_1"),
|
||||
Ident::new("table_1"),
|
||||
]),
|
||||
or_replace: true,
|
||||
if_not_exists: true,
|
||||
expire_after: Some(7200), // 2 hours in seconds
|
||||
comment: Some("lowercase test".to_string()),
|
||||
},
|
||||
),
|
||||
];
|
||||
|
||||
for (sql, expected) in testcases {
|
||||
|
||||
@@ -34,7 +34,6 @@ const FORMAT: &str = "FORMAT";
|
||||
|
||||
use sqlparser::parser::Parser;
|
||||
|
||||
use crate::dialect::GreptimeDbDialect;
|
||||
use crate::parsers::error::{
|
||||
ConvertToLogicalExpressionSnafu, EvaluationSnafu, ParserSnafu, TQLError,
|
||||
};
|
||||
@@ -106,36 +105,49 @@ impl ParserContext<'_> {
|
||||
let (start, end, step, lookback) = match parser.peek_token().token {
|
||||
Token::LParen => {
|
||||
let _consume_lparen_token = parser.next_token();
|
||||
let start = Self::parse_string_or_number_or_word(
|
||||
parser,
|
||||
&[Token::Comma],
|
||||
require_now_expr,
|
||||
)?
|
||||
.0;
|
||||
let end = Self::parse_string_or_number_or_word(
|
||||
parser,
|
||||
&[Token::Comma],
|
||||
require_now_expr,
|
||||
)?
|
||||
.0;
|
||||
let exprs = parser
|
||||
.parse_comma_separated(Parser::parse_expr)
|
||||
.context(ParserSnafu)?;
|
||||
|
||||
let (step, delimiter) = Self::parse_string_or_number_or_word(
|
||||
parser,
|
||||
&[Token::Comma, Token::RParen],
|
||||
false,
|
||||
let param_count = exprs.len();
|
||||
|
||||
if param_count != 3 && param_count != 4 {
|
||||
return Err(ParserError::ParserError(
|
||||
format!("Expected 3 or 4 expressions in TQL parameters (start, end, step, [lookback]), but found {}", param_count)
|
||||
))
|
||||
.context(ParserSnafu);
|
||||
}
|
||||
|
||||
let mut exprs_iter = exprs.into_iter();
|
||||
// Safety: safe to call next and unwrap, because we already check the param_count above.
|
||||
let start = Self::parse_expr_to_literal_or_ts(
|
||||
exprs_iter.next().unwrap(),
|
||||
require_now_expr,
|
||||
)?;
|
||||
let lookback = if delimiter == Token::Comma {
|
||||
Self::parse_string_or_number_or_word(parser, &[Token::RParen], false)
|
||||
.ok()
|
||||
.map(|t| t.0)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let end = Self::parse_expr_to_literal_or_ts(
|
||||
exprs_iter.next().unwrap(),
|
||||
require_now_expr,
|
||||
)?;
|
||||
let step = Self::parse_expr_to_literal_or_ts(exprs_iter.next().unwrap(), false)?;
|
||||
|
||||
let lookback = exprs_iter
|
||||
.next()
|
||||
.map(|expr| Self::parse_expr_to_literal_or_ts(expr, false))
|
||||
.transpose()?;
|
||||
|
||||
if !parser.consume_token(&Token::RParen) {
|
||||
return Err(ParserError::ParserError(format!(
|
||||
"Expected ')' after TQL parameters, but found: {}",
|
||||
parser.peek_token()
|
||||
)))
|
||||
.context(ParserSnafu);
|
||||
}
|
||||
|
||||
(start, end, step, lookback)
|
||||
}
|
||||
_ => ("0".to_string(), "0".to_string(), "5m".to_string(), None),
|
||||
};
|
||||
|
||||
let query = Self::parse_tql_query(parser, self.sql).context(ParserSnafu)?;
|
||||
Ok(TqlParameters::new(start, end, step, lookback, query))
|
||||
}
|
||||
@@ -179,72 +191,43 @@ impl ParserContext<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to parse and consume a string, number or word token.
|
||||
/// Return `Ok` if it's parsed and one of the given delimiter tokens is consumed.
|
||||
/// The string and matched delimiter will be returned as a tuple.
|
||||
fn parse_string_or_number_or_word(
|
||||
parser: &mut Parser,
|
||||
delimiter_tokens: &[Token],
|
||||
require_now_expr: bool,
|
||||
) -> std::result::Result<(String, Token), TQLError> {
|
||||
let mut tokens = vec![];
|
||||
|
||||
while !delimiter_tokens.contains(&parser.peek_token().token) {
|
||||
let token = parser.next_token().token;
|
||||
if matches!(token, Token::EOF) {
|
||||
break;
|
||||
}
|
||||
tokens.push(token);
|
||||
}
|
||||
let result = match tokens.len() {
|
||||
0 => Err(ParserError::ParserError(
|
||||
"Expected at least one token".to_string(),
|
||||
))
|
||||
.context(ParserSnafu),
|
||||
1 => {
|
||||
let value = match tokens[0].clone() {
|
||||
Token::Number(n, _) if !require_now_expr => n,
|
||||
Token::DoubleQuotedString(s) | Token::SingleQuotedString(s)
|
||||
if !require_now_expr =>
|
||||
{
|
||||
s
|
||||
}
|
||||
Token::Word(_) => Self::parse_tokens_to_ts(tokens, require_now_expr)?,
|
||||
unexpected => {
|
||||
if !require_now_expr {
|
||||
return Err(ParserError::ParserError(format!(
|
||||
"Expected number, string or word, but have {unexpected:?}"
|
||||
)))
|
||||
.context(ParserSnafu);
|
||||
} else {
|
||||
return Err(ParserError::ParserError(format!(
|
||||
"Expected expression containing `now()`, but have {unexpected:?}"
|
||||
)))
|
||||
.context(ParserSnafu);
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(value)
|
||||
}
|
||||
_ => Self::parse_tokens_to_ts(tokens, require_now_expr),
|
||||
};
|
||||
for token in delimiter_tokens {
|
||||
if parser.consume_token(token) {
|
||||
return result.map(|v| (v, token.clone()));
|
||||
}
|
||||
}
|
||||
Err(ParserError::ParserError(format!(
|
||||
"Delimiters not match {delimiter_tokens:?}"
|
||||
)))
|
||||
.context(ParserSnafu)
|
||||
}
|
||||
|
||||
/// Parse the tokens to seconds and convert to string.
|
||||
fn parse_tokens_to_ts(
|
||||
tokens: Vec<Token>,
|
||||
/// Parse the expression to a literal string or a timestamp in seconds.
|
||||
fn parse_expr_to_literal_or_ts(
|
||||
parser_expr: sqlparser::ast::Expr,
|
||||
require_now_expr: bool,
|
||||
) -> std::result::Result<String, TQLError> {
|
||||
match parser_expr {
|
||||
sqlparser::ast::Expr::Value(v) => match v.value {
|
||||
sqlparser::ast::Value::Number(s, _) if !require_now_expr => Ok(s),
|
||||
sqlparser::ast::Value::DoubleQuotedString(s)
|
||||
| sqlparser::ast::Value::SingleQuotedString(s)
|
||||
if !require_now_expr =>
|
||||
{
|
||||
Ok(s)
|
||||
}
|
||||
unexpected => {
|
||||
if !require_now_expr {
|
||||
Err(ParserError::ParserError(format!(
|
||||
"Expected number, string or word, but have {unexpected:?}"
|
||||
)))
|
||||
.context(ParserSnafu)
|
||||
} else {
|
||||
Err(ParserError::ParserError(format!(
|
||||
"Expected expression containing `now()`, but have {unexpected:?}"
|
||||
)))
|
||||
.context(ParserSnafu)
|
||||
}
|
||||
}
|
||||
},
|
||||
_ => Self::parse_expr_to_ts(parser_expr, require_now_expr),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the expression to a timestamp in seconds.
|
||||
fn parse_expr_to_ts(
|
||||
parser_expr: sqlparser::ast::Expr,
|
||||
require_now_expr: bool,
|
||||
) -> std::result::Result<String, TQLError> {
|
||||
let parser_expr = Self::parse_to_expr(tokens)?;
|
||||
let lit = utils::parser_expr_to_scalar_value_literal(parser_expr, require_now_expr)
|
||||
.map_err(Box::new)
|
||||
.context(ConvertToLogicalExpressionSnafu)?;
|
||||
@@ -267,13 +250,6 @@ impl ParserContext<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_to_expr(tokens: Vec<Token>) -> std::result::Result<sqlparser::ast::Expr, TQLError> {
|
||||
Parser::new(&GreptimeDbDialect {})
|
||||
.with_tokens(tokens)
|
||||
.parse_expr()
|
||||
.context(ParserSnafu)
|
||||
}
|
||||
|
||||
fn parse_tql_query(parser: &mut Parser, sql: &str) -> std::result::Result<String, ParserError> {
|
||||
while matches!(parser.peek_token().token, Token::Comma) {
|
||||
let _skip_token = parser.next_token();
|
||||
@@ -405,6 +381,60 @@ mod tests {
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_tql_eval_with_date_trunc() {
|
||||
let sql = "TQL EVAL (date_trunc('day', now() - interval '1' day), date_trunc('day', now()), '1h') http_requests_total{environment=~'staging|testing|development',method!='GET'} @ 1609746000 offset 5m";
|
||||
let statement = parse_into_statement(sql);
|
||||
match statement {
|
||||
Statement::Tql(Tql::Eval(eval)) => {
|
||||
// date_trunc('day', now() - interval '1' day) should resolve to start of yesterday
|
||||
// date_trunc('day', now()) should resolve to start of today
|
||||
// The exact values depend on when the test runs, but we can verify the structure
|
||||
assert!(eval.start.parse::<i64>().is_ok());
|
||||
assert!(eval.end.parse::<i64>().is_ok());
|
||||
assert_eq!(eval.step, "1h");
|
||||
assert_eq!(eval.lookback, None);
|
||||
assert_eq!(
|
||||
eval.query,
|
||||
"http_requests_total{environment=~'staging|testing|development',method!='GET'} @ 1609746000 offset 5m"
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
// Test with 4 parameters including lookback
|
||||
let sql = "TQL EVAL (date_trunc('hour', now() - interval '6' hour), date_trunc('hour', now()), '30m', '5m') cpu_usage_total";
|
||||
let statement = parse_into_statement(sql);
|
||||
match statement {
|
||||
Statement::Tql(Tql::Eval(eval)) => {
|
||||
assert!(eval.start.parse::<i64>().is_ok());
|
||||
assert!(eval.end.parse::<i64>().is_ok());
|
||||
assert_eq!(eval.step, "30m");
|
||||
assert_eq!(eval.lookback, Some("5m".to_string()));
|
||||
assert_eq!(eval.query, "cpu_usage_total");
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_tql_analyze_with_date_trunc() {
|
||||
let sql = "TQL ANALYZE VERBOSE FORMAT JSON (date_trunc('week', now() - interval '2' week), date_trunc('week', now()), '4h', '1h') network_bytes_total";
|
||||
let statement = parse_into_statement(sql);
|
||||
match statement {
|
||||
Statement::Tql(Tql::Analyze(analyze)) => {
|
||||
assert!(analyze.start.parse::<i64>().is_ok());
|
||||
assert!(analyze.end.parse::<i64>().is_ok());
|
||||
assert_eq!(analyze.step, "4h");
|
||||
assert_eq!(analyze.lookback, Some("1h".to_string()));
|
||||
assert_eq!(analyze.query, "network_bytes_total");
|
||||
assert!(analyze.is_verbose);
|
||||
assert_eq!(analyze.format, Some(AnalyzeFormat::JSON));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_tql_eval() {
|
||||
let sql = "TQL EVAL (1676887657, 1676887659, '1m') http_requests_total{environment=~'staging|testing|development',method!='GET'} @ 1609746000 offset 5m";
|
||||
@@ -901,17 +931,26 @@ mod tests {
|
||||
let sql = "TQL EVAL (1676887657, 1676887659, 1m) http_requests_total{environment=~'staging|testing|development',method!='GET'} @ 1609746000 offset 5m";
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
|
||||
assert!(result
|
||||
.output_msg()
|
||||
.contains("Failed to extract a timestamp value"));
|
||||
|
||||
assert!(
|
||||
result
|
||||
.output_msg()
|
||||
.contains("Expected ')' after TQL parameters, but found: m"),
|
||||
"{}",
|
||||
result.output_msg()
|
||||
);
|
||||
|
||||
// missing end
|
||||
let sql = "TQL EVAL (1676887657, '1m') http_requests_total{environment=~'staging|testing|development',method!='GET'} @ 1609746000 offset 5m";
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
|
||||
assert!(result
|
||||
.output_msg()
|
||||
.contains("Failed to extract a timestamp value"));
|
||||
assert!(
|
||||
result
|
||||
.output_msg()
|
||||
.contains("Expected 3 or 4 expressions in TQL parameters"),
|
||||
"{}",
|
||||
result.output_msg()
|
||||
);
|
||||
|
||||
// empty TQL query
|
||||
let sql = "TQL EVAL (0, 30, '10s')";
|
||||
@@ -923,6 +962,12 @@ mod tests {
|
||||
let sql = "tql eval (0, 0, '1s) t;;';";
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
|
||||
assert!(result.output_msg().contains("Delimiters not match"));
|
||||
assert!(
|
||||
result
|
||||
.output_msg()
|
||||
.contains("Expected ')' after TQL parameters, but found: ;"),
|
||||
"{}",
|
||||
result.output_msg()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,13 +14,17 @@ Affected Rows: 0
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
Affected Rows: 5
|
||||
Affected Rows: 9
|
||||
|
||||
SELECT
|
||||
count(i),
|
||||
@@ -33,7 +37,7 @@ FROM
|
||||
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
|
||||
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| 5 | 15 | 2.9742334234767016 | 5 |
|
||||
| 9 | 31 | 2.9742334234767016 | 6 |
|
||||
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -122,11 +126,11 @@ SELECT
|
||||
FROM
|
||||
integers;
|
||||
|
||||
+-----------------+
|
||||
| avg(integers.i) |
|
||||
+-----------------+
|
||||
| 3.0 |
|
||||
+-----------------+
|
||||
+--------------------+
|
||||
| avg(integers.i) |
|
||||
+--------------------+
|
||||
| 3.4444444444444446 |
|
||||
+--------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
@@ -214,10 +218,10 @@ ORDER BY
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| ts | count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| 2023-01-01T00:00:00 | 2 | 6 | 5.002829575110705 | 2 |
|
||||
| 2023-01-01T00:00:00 | 4 | 11 | 2.9742334234767016 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 | 2 | 1.9936617014173446 | 1 |
|
||||
| 2023-01-01T02:00:00 | 1 | 3 | 2.9742334234767016 | 1 |
|
||||
| 2023-01-01T03:00:00 | 1 | 4 | 4.014835333028587 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 | 7 | 4.014835333028587 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 | 11 | 5.98951037117262 | 2 |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -321,6 +325,129 @@ ORDER BY
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| time_window | count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| 2023-01-01T00:00:00 | 4 | 11 | 2.9742334234767016 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 | 2 | 1.9936617014173446 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 | 7 | 4.014835333028587 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 | 11 | 5.98951037117262 | 2 |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST_|
|
||||
|_|_Projection: date_bin(Utf8("2 seconds"),integers.ts) AS time_window, count(integers.i), sum(integers.i), uddsketch_calc(Float64(0.5), uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i))_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("2 seconds"),integers.ts)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i), __sum_merge(__sum_state(integers.i)) AS sum(integers.i), __uddsketch_state_merge(__uddsketch_state_state(Int64(128),Float64(0.01),integers.i)) AS uddsketch_state(Int64(128),Float64(0.01),integers.i), __hll_merge(__hll_state(integers.i)) AS hll(integers.i)]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("2 seconds") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128), Float64(0.01), CAST(integers.i AS Float64)), __hll_state(CAST(integers.i AS Utf8))]]_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[time_window@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_ProjectionExec: expr=[date_bin(Utf8("2 seconds"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i), sum(integers.i)@2 as sum(integers.i), uddsketch_calc(0.5, uddsketch_state(Int64(128),Float64(0.01),integers.i)@3) as uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i)@4) as hll_count(hll(integers.i))]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_ProjectionExec: expr=[date_bin(Utf8("2 seconds"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i), sum(integers.i)@2 as sum(integers.i), uddsketch_calc(0.5, uddsketch_state(Int64(128),Float64(0.01),integers.i)@3) as uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i)@4) as hll_count(hll(integers.i))] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -12,11 +12,15 @@ CREATE TABLE integers(
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
SELECT
|
||||
count(i),
|
||||
@@ -142,4 +146,60 @@ GROUP BY
|
||||
ORDER BY
|
||||
ts;
|
||||
|
||||
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
@@ -1037,3 +1037,277 @@ drop table aggr_optimize_not;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
--
|
||||
-- Additional test cases for step aggregation pushdown
|
||||
--
|
||||
CREATE TABLE step_aggr_extended (
|
||||
pk_col_1 STRING,
|
||||
pk_col_2 BIGINT,
|
||||
val_col_1 BIGINT,
|
||||
val_col_2 STRING,
|
||||
val_col_3 BIGINT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY(pk_col_1, pk_col_2)
|
||||
) PARTITION ON COLUMNS (pk_col_1) (
|
||||
pk_col_1 < 'f',
|
||||
pk_col_1 >= 'f'
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO step_aggr_extended VALUES
|
||||
('a', 1, 100, 'v1', 10, 1672531200000),
|
||||
('a', 2, 200, 'v2', NULL, 1672531201000),
|
||||
('g', 1, 300, 'v1', 30, 1672531202000),
|
||||
('g', 2, 400, 'v2', 40, 1672531203000),
|
||||
('a', 3, 100, 'v3', 10, 1672531204000),
|
||||
('g', 3, 300, 'v3', 30, 1672531205000),
|
||||
('h', 4, 500, NULL, 50, 1672531206000);
|
||||
|
||||
Affected Rows: 7
|
||||
|
||||
-- Case 12: GROUP BY includes a mix of partition key and non-partition key.
|
||||
-- `pk_col_1` is a partition key, `pk_col_2` is not.
|
||||
-- This should pushdown entire aggregation to datanodes since it's partitioned by `pk_col_1`.
|
||||
-- Expected: Full pushdown of aggregation to datanodes.
|
||||
SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
+----------+----------+-----------------------------------+
|
||||
| pk_col_1 | pk_col_2 | sum(step_aggr_extended.val_col_1) |
|
||||
+----------+----------+-----------------------------------+
|
||||
| a | 1 | 100 |
|
||||
| a | 2 | 200 |
|
||||
| a | 3 | 100 |
|
||||
| g | 1 | 300 |
|
||||
| g | 2 | 400 |
|
||||
| g | 3 | 300 |
|
||||
| h | 4 | 500 |
|
||||
+----------+----------+-----------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: step_aggr_extended.pk_col_1 ASC NULLS LAST, step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: step_aggr_extended.pk_col_1 ASC NULLS LAST, step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|
||||
|_|_Projection: step_aggr_extended.pk_col_1, step_aggr_extended.pk_col_2, sum(step_aggr_extended.val_col_1)_|
|
||||
|_|_Aggregate: groupBy=[[step_aggr_extended.pk_col_1, step_aggr_extended.pk_col_2]], aggr=[[sum(step_aggr_extended.val_col_1)]] |
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [pk_col_1@0 ASC NULLS LAST, pk_col_2@1 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 13: COUNT(DISTINCT) aggregation.
|
||||
-- `DISTINCT` aggregation is more complex and requires a two-phase distinct calculation in a distributed environment. Currently not supported for pushdown.
|
||||
-- Expected: datanode only do table scan, actual aggregation happens on frontend.
|
||||
SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
+----------------------------------------------+
|
||||
| count(DISTINCT step_aggr_extended.val_col_1) |
|
||||
+----------------------------------------------+
|
||||
| 5 |
|
||||
+----------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Projection: count(alias1) AS count(DISTINCT step_aggr_extended.val_col_1)_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[count(alias1)]]_|
|
||||
|_|_Aggregate: groupBy=[[step_aggr_extended.val_col_1 AS alias1]], aggr=[[]]_|
|
||||
|_|_Projection: step_aggr_extended.val_col_1_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT step_aggr_extended.val_col_1)]_|
|
||||
|_|_AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[val_col_1@0 as alias1], aggr=[]_|
|
||||
|_|_ProjectionExec: expr=[val_col_1@2 as val_col_1]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 14: Aggregation with a HAVING clause.
|
||||
-- The `HAVING` clause filters results after aggregation.
|
||||
-- Expected: The `HAVING` filter should be applied on the frontend after the final aggregation is complete, not pushed down to datanodes.
|
||||
SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
+----------+-----------------------------------+
|
||||
| pk_col_2 | sum(step_aggr_extended.val_col_1) |
|
||||
+----------+-----------------------------------+
|
||||
| 1 | 400 |
|
||||
| 2 | 600 |
|
||||
| 3 | 400 |
|
||||
| 4 | 500 |
|
||||
+----------+-----------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|
||||
|_|_Filter: sum(step_aggr_extended.val_col_1) > Int64(300)_|
|
||||
|_|_Aggregate: groupBy=[[step_aggr_extended.pk_col_2]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_1)) AS sum(step_aggr_extended.val_col_1)]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[step_aggr_extended.pk_col_2]], aggr=[[__sum_state(step_aggr_extended.val_col_1)]]_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [pk_col_2@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[pk_col_2@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_FilterExec: sum(step_aggr_extended.val_col_1)@1 > 300_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[pk_col_2@0 as pk_col_2], aggr=[sum(step_aggr_extended.val_col_1)]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[pk_col_2@0 as pk_col_2], aggr=[sum(step_aggr_extended.val_col_1)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 15: Aggregation on a column with NULL values.
|
||||
-- `SUM` should ignore NULLs. `COUNT(val_col_2)` should count non-nulls, `COUNT(*)` should count all rows.
|
||||
-- Expected: Correct aggregation results, proving NULLs are handled properly in a distributed context.
|
||||
SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
|
||||
| sum(step_aggr_extended.val_col_3) | count(step_aggr_extended.val_col_2) | count(step_aggr_extended.val_col_3) | count(*) |
|
||||
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
|
||||
| 170 | 6 | 6 | 7 |
|
||||
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Projection: sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1)) AS count(*)_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_3)) AS sum(step_aggr_extended.val_col_3), __count_merge(__count_state(step_aggr_extended.val_col_2)) AS count(step_aggr_extended.val_col_2), __count_merge(__count_state(step_aggr_extended.val_col_3)) AS count(step_aggr_extended.val_col_3), __count_merge(__count_state(step_aggr_extended.ts)) AS count(Int64(1))]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__sum_state(step_aggr_extended.val_col_3), __count_state(step_aggr_extended.val_col_2), __count_state(step_aggr_extended.val_col_3), __count_state(step_aggr_extended.ts)]]_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | ProjectionExec: expr=[sum(step_aggr_extended.val_col_3)@0 as sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2)@1 as count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3)@2 as count(step_aggr_extended.val_col_3), count(Int64(1))@3 as count(*)]_|
|
||||
|_|_AggregateExec: mode=Final, gby=[], aggr=[sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1))]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1))]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 16: Aggregation on STRING columns.
|
||||
-- `MIN` and `MAX` can operate on strings.
|
||||
-- Expected: Correct lexicographical min/max results.
|
||||
SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
+----------------------------------+-----------------------------------+
|
||||
| min(step_aggr_extended.pk_col_1) | max(step_aggr_extended.val_col_2) |
|
||||
+----------------------------------+-----------------------------------+
|
||||
| a | v3 |
|
||||
+----------------------------------+-----------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__min_merge(__min_state(step_aggr_extended.pk_col_1)) AS min(step_aggr_extended.pk_col_1), __max_merge(__max_state(step_aggr_extended.val_col_2)) AS max(step_aggr_extended.val_col_2)]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__min_state(step_aggr_extended.pk_col_1), __max_state(step_aggr_extended.val_col_2)]]_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[min(step_aggr_extended.pk_col_1), max(step_aggr_extended.val_col_2)]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[min(step_aggr_extended.pk_col_1), max(step_aggr_extended.val_col_2)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 17: Aggregation on an empty input set.
|
||||
-- `WHERE` clause filters out all rows.
|
||||
-- Expected: Aggregation should return correct default values (e.g., COUNT is 0, SUM is NULL).
|
||||
SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
+-----------------------------------+----------+
|
||||
| sum(step_aggr_extended.val_col_1) | count(*) |
|
||||
+-----------------------------------+----------+
|
||||
| | 0 |
|
||||
+-----------------------------------+----------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Projection: sum(step_aggr_extended.val_col_1), count(Int64(1)) AS count(*)_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_1)) AS sum(step_aggr_extended.val_col_1), __count_merge(__count_state(step_aggr_extended.ts)) AS count(Int64(1))]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__sum_state(step_aggr_extended.val_col_1), __count_state(step_aggr_extended.ts)]]_|
|
||||
|_|_Filter: step_aggr_extended.pk_col_1 = Utf8("non_existent")_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | ProjectionExec: expr=[sum(step_aggr_extended.val_col_1)@0 as sum(step_aggr_extended.val_col_1), count(Int64(1))@1 as count(*)]_|
|
||||
|_|_AggregateExec: mode=Final, gby=[], aggr=[sum(step_aggr_extended.val_col_1), count(Int64(1))]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[sum(step_aggr_extended.val_col_1), count(Int64(1))]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
DROP TABLE step_aggr_extended;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
|
||||
@@ -305,3 +305,110 @@ GROUP BY
|
||||
drop table aggr_optimize_not_count;
|
||||
|
||||
drop table aggr_optimize_not;
|
||||
|
||||
--
|
||||
-- Additional test cases for step aggregation pushdown
|
||||
--
|
||||
CREATE TABLE step_aggr_extended (
|
||||
pk_col_1 STRING,
|
||||
pk_col_2 BIGINT,
|
||||
val_col_1 BIGINT,
|
||||
val_col_2 STRING,
|
||||
val_col_3 BIGINT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY(pk_col_1, pk_col_2)
|
||||
) PARTITION ON COLUMNS (pk_col_1) (
|
||||
pk_col_1 < 'f',
|
||||
pk_col_1 >= 'f'
|
||||
);
|
||||
|
||||
INSERT INTO step_aggr_extended VALUES
|
||||
('a', 1, 100, 'v1', 10, 1672531200000),
|
||||
('a', 2, 200, 'v2', NULL, 1672531201000),
|
||||
('g', 1, 300, 'v1', 30, 1672531202000),
|
||||
('g', 2, 400, 'v2', 40, 1672531203000),
|
||||
('a', 3, 100, 'v3', 10, 1672531204000),
|
||||
('g', 3, 300, 'v3', 30, 1672531205000),
|
||||
('h', 4, 500, NULL, 50, 1672531206000);
|
||||
|
||||
|
||||
-- Case 12: GROUP BY includes a mix of partition key and non-partition key.
|
||||
-- `pk_col_1` is a partition key, `pk_col_2` is not.
|
||||
-- This should pushdown entire aggregation to datanodes since it's partitioned by `pk_col_1`.
|
||||
-- Expected: Full pushdown of aggregation to datanodes.
|
||||
SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
-- Case 13: COUNT(DISTINCT) aggregation.
|
||||
-- `DISTINCT` aggregation is more complex and requires a two-phase distinct calculation in a distributed environment. Currently not supported for pushdown.
|
||||
-- Expected: datanode only do table scan, actual aggregation happens on frontend.
|
||||
SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
-- Case 14: Aggregation with a HAVING clause.
|
||||
-- The `HAVING` clause filters results after aggregation.
|
||||
-- Expected: The `HAVING` filter should be applied on the frontend after the final aggregation is complete, not pushed down to datanodes.
|
||||
SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
-- Case 15: Aggregation on a column with NULL values.
|
||||
-- `SUM` should ignore NULLs. `COUNT(val_col_2)` should count non-nulls, `COUNT(*)` should count all rows.
|
||||
-- Expected: Correct aggregation results, proving NULLs are handled properly in a distributed context.
|
||||
SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
-- Case 16: Aggregation on STRING columns.
|
||||
-- `MIN` and `MAX` can operate on strings.
|
||||
-- Expected: Correct lexicographical min/max results.
|
||||
SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
-- Case 17: Aggregation on an empty input set.
|
||||
-- `WHERE` clause filters out all rows.
|
||||
-- Expected: Aggregation should return correct default values (e.g., COUNT is 0, SUM is NULL).
|
||||
SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
DROP TABLE step_aggr_extended;
|
||||
|
||||
@@ -14,13 +14,17 @@ Affected Rows: 0
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
Affected Rows: 5
|
||||
Affected Rows: 9
|
||||
|
||||
-- count
|
||||
SELECT
|
||||
@@ -31,7 +35,7 @@ FROM
|
||||
+-------------------+
|
||||
| count(integers.i) |
|
||||
+-------------------+
|
||||
| 5 |
|
||||
| 9 |
|
||||
+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -120,10 +124,10 @@ ORDER BY
|
||||
+---------------------+-------------------+
|
||||
| ts | count(integers.i) |
|
||||
+---------------------+-------------------+
|
||||
| 2023-01-01T00:00:00 | 2 |
|
||||
| 2023-01-01T00:00:00 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 1 |
|
||||
| 2023-01-01T03:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 |
|
||||
+---------------------+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -234,10 +238,10 @@ ORDER BY
|
||||
+---------------------+-------------------+
|
||||
| time_window | count(integers.i) |
|
||||
+---------------------+-------------------+
|
||||
| 2023-01-01T00:00:00 | 2 |
|
||||
| 2023-01-01T00:00:00 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 1 |
|
||||
| 2023-01-01T03:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 |
|
||||
+---------------------+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -260,15 +264,20 @@ ORDER BY
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|
||||
|_|_Projection: date_bin(Utf8("1 hour"),integers.ts) AS time_window, count(integers.i)_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[count(integers.i)]] |
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("1 hour"),integers.ts)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i)]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[__count_state(integers.i)]]_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_SortExec: expr=[time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] |
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
@@ -299,37 +308,33 @@ ORDER BY
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
@@ -354,10 +359,13 @@ ORDER BY
|
||||
| integers.ts + Int64(1) | integers.i / Int64(2) | count(integers.i) |
|
||||
+------------------------+-----------------------+-------------------+
|
||||
| 1672531200001 | 0 | 1 |
|
||||
| 1672531200001 | 1 | 2 |
|
||||
| 1672531200001 | 2 | 1 |
|
||||
| 1672534800001 | 1 | 1 |
|
||||
| 1672538400001 | 1 | 1 |
|
||||
| 1672538400001 | 2 | 1 |
|
||||
| 1672542000001 | 2 | 1 |
|
||||
| 1672542000001 | 3 | 1 |
|
||||
+------------------------+-----------------------+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -383,15 +391,18 @@ ORDER BY
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|
||||
| logical_plan_| Sort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|
||||
|_|_Aggregate: groupBy=[[integers.ts + Int64(1), integers.i / Int64(2)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i)]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|
||||
|_|_Projection: integers.ts + Int64(1), integers.i / Int64(2), count(integers.i)_|
|
||||
|_|_Aggregate: groupBy=[[CAST(integers.ts AS Int64) + Int64(1), integers.i / Int64(2)]], aggr=[[count(integers.i)]] |
|
||||
|_| Aggregate: groupBy=[[CAST(integers.ts AS Int64) + Int64(1), integers.i / Int64(2)]], aggr=[[__count_state(integers.i)]]_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] |
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
@@ -425,38 +436,36 @@ ORDER BY
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 5_|
|
||||
|_|_| Total rows: 8_|
|
||||
+-+-+-+
|
||||
|
||||
-- test udd/hll_merege pushdown
|
||||
@@ -487,7 +496,7 @@ GROUP BY
|
||||
time_window,
|
||||
host;
|
||||
|
||||
Affected Rows: 5
|
||||
Affected Rows: 9
|
||||
|
||||
SELECT
|
||||
uddsketch_calc(0.5, uddsketch_merge(128, 0.01, udd_state)) as udd_result,
|
||||
@@ -498,7 +507,7 @@ FROM
|
||||
+--------------------+------------+
|
||||
| udd_result | hll_result |
|
||||
+--------------------+------------+
|
||||
| 2.9742334234767016 | 5 |
|
||||
| 2.9742334234767016 | 6 |
|
||||
+--------------------+------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
|
||||
@@ -12,11 +12,15 @@ CREATE TABLE integers(
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
-- count
|
||||
SELECT
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -49,9 +49,9 @@ EXPLAIN SELECT * FROM integers i1 WHERE EXISTS(SELECT i FROM integers WHERE i=i1
|
||||
+-+-+
|
||||
| logical_plan_| Sort: i1.i ASC NULLS LAST_|
|
||||
|_|_LeftSemi Join: i1.i = __correlated_sq_1.i_|
|
||||
|_|_SubqueryAlias: i1_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: integers_|
|
||||
|_| SubqueryAlias: i1_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
|_|_SubqueryAlias: __correlated_sq_1_|
|
||||
|_|_Projection: integers.i_|
|
||||
@@ -155,3 +155,520 @@ drop table integers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE integers(i INTEGER, j TIMESTAMP TIME INDEX)
|
||||
PARTITION ON COLUMNS (i) (
|
||||
i < 1000,
|
||||
i >= 1000 AND i < 2000,
|
||||
i >= 2000
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT * FROM integers i1 WHERE EXISTS(SELECT i FROM integers WHERE i=i1.i) ORDER BY i1.i;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: i1.i ASC NULLS LAST_|
|
||||
|_|_LeftSemi Join: i1.i = __correlated_sq_1.i_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| SubqueryAlias: i1_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
|_|_SubqueryAlias: __correlated_sq_1_|
|
||||
|_|_Projection: integers.i_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [i@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[i@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_REDACTED
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_ProjectionExec: expr=[i@0 as i]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT * FROM integers i1 WHERE EXISTS(SELECT count(i) FROM integers WHERE i=i1.i) ORDER BY i1.i;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: i1.i ASC NULLS LAST_|
|
||||
|_|_LeftSemi Join: i1.i = __correlated_sq_1.i_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| SubqueryAlias: i1_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
|_|_SubqueryAlias: __correlated_sq_1_|
|
||||
|_|_Aggregate: groupBy=[[integers.i]], aggr=[[]]_|
|
||||
|_|_Projection: integers.i_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [i@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[i@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_REDACTED
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_AggregateExec: mode=SinglePartitioned, gby=[i@0 as i], aggr=[]_|
|
||||
|_|_ProjectionExec: expr=[i@0 as i]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE t(ts timestamp time index, a INT, b INT)PARTITION ON COLUMNS (a) (
|
||||
a < 1000,
|
||||
a >= 1000 AND a < 2000,
|
||||
a >= 2000
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE t1(ts timestamp time index, a INT)PARTITION ON COLUMNS (a) (
|
||||
a < 1000,
|
||||
a >= 1000 AND a < 2000,
|
||||
a >= 2000
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE t2(ts timestamp time index, a INT)PARTITION ON COLUMNS (a) (
|
||||
a < 1000,
|
||||
a >= 1000 AND a < 2000,
|
||||
a >= 2000
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO t(ts,a,b) VALUES (1,3,30),(2,1,10),(3,2,20);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO t1(ts,a) VALUES (1,1),(2,3);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
INSERT INTO t2(ts,a) VALUES (1,2),(2,3);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
+---+
|
||||
| x |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
+---+
|
||||
|
||||
-- expected: 1,2,3
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: sq.x ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: sq.x ASC NULLS LAST_|
|
||||
|_|_Projection: sq.x_|
|
||||
|_|_SubqueryAlias: sq_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [x@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT x, COUNT(*) AS c FROM (SELECT a AS x FROM t) sq GROUP BY x ORDER BY x;
|
||||
|
||||
+---+---+
|
||||
| x | c |
|
||||
+---+---+
|
||||
| 1 | 1 |
|
||||
| 2 | 1 |
|
||||
| 3 | 1 |
|
||||
+---+---+
|
||||
|
||||
-- expected:
|
||||
-- x | c
|
||||
-- 1 | 1
|
||||
-- 2 | 1
|
||||
-- 3 | 1
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x, COUNT(*) AS c FROM (SELECT a AS x FROM t) sq GROUP BY x ORDER BY x;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: sq.x ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: sq.x ASC NULLS LAST_|
|
||||
|_|_Projection: sq.x, count(Int64(1)) AS count(*) AS c_|
|
||||
|_|_Aggregate: groupBy=[[sq.x]], aggr=[[count(Int64(1))]]_|
|
||||
|_|_SubqueryAlias: sq_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [x@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT DISTINCT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
+---+
|
||||
| x |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
+---+
|
||||
|
||||
-- expecetd: 1,2,3
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT DISTINCT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: sq.x ASC NULLS LAST_|
|
||||
|_|_Aggregate: groupBy=[[sq.x]], aggr=[[]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: sq.x_|
|
||||
|_|_SubqueryAlias: sq_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [x@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[x@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_AggregateExec: mode=SinglePartitioned, gby=[x@0 as x], aggr=[]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT sq.x FROM (SELECT a AS x FROM t) sq ORDER BY sq.x;
|
||||
|
||||
+---+
|
||||
| x |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
+---+
|
||||
|
||||
-- expected: 1,2,3
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT sq.x FROM (SELECT a AS x FROM t) sq ORDER BY sq.x;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: sq.x ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: sq.x ASC NULLS LAST_|
|
||||
|_|_Projection: sq.x_|
|
||||
|_|_SubqueryAlias: sq_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [x@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT y FROM (SELECT x AS y FROM (SELECT a AS x FROM t) sq1) sq2 ORDER BY y;
|
||||
|
||||
+---+
|
||||
| y |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
+---+
|
||||
|
||||
-- expected: 1,2,3
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT y FROM (SELECT x AS y FROM (SELECT a AS x FROM t) sq1) sq2 ORDER BY y;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: sq2.y ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: sq2.y ASC NULLS LAST_|
|
||||
|_|_Projection: sq2.y_|
|
||||
|_|_SubqueryAlias: sq2_|
|
||||
|_|_Projection: sq1.x AS y_|
|
||||
|_|_SubqueryAlias: sq1_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [y@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT x, x + 1 AS y FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
+---+---+
|
||||
| x | y |
|
||||
+---+---+
|
||||
| 1 | 2 |
|
||||
| 2 | 3 |
|
||||
| 3 | 4 |
|
||||
+---+---+
|
||||
|
||||
-- expected:
|
||||
-- (x,y)
|
||||
-- (1,2)
|
||||
-- (2,3)
|
||||
-- (3,4)
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x, x + 1 AS y FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: sq.x ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: sq.x ASC NULLS LAST_|
|
||||
|_|_Projection: sq.x, CAST(sq.x AS Int64) + Int64(1) AS y_|
|
||||
|_|_SubqueryAlias: sq_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [x@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT a FROM ((SELECT a FROM t1) UNION ALL (SELECT a FROM t2)) u ORDER BY a;
|
||||
|
||||
+---+
|
||||
| a |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
| 3 |
|
||||
+---+
|
||||
|
||||
-- expected: 1,2,3,3
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT a FROM ((SELECT a FROM t1) UNION ALL (SELECT a FROM t2)) u ORDER BY a;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: u.a ASC NULLS LAST_|
|
||||
|_|_SubqueryAlias: u_|
|
||||
|_|_Union_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: t1.a_|
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: t2.a_|
|
||||
|_|_TableScan: t2_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_InterleaveExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT u1.a
|
||||
FROM (SELECT a FROM t1) u1
|
||||
JOIN (SELECT a FROM t2) u2 ON u1.a = u2.a
|
||||
ORDER BY u1.a;
|
||||
|
||||
+---+
|
||||
| a |
|
||||
+---+
|
||||
| 3 |
|
||||
+---+
|
||||
|
||||
-- expected: 3
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT u1.a
|
||||
FROM (SELECT a FROM t1) u1
|
||||
JOIN (SELECT a FROM t2) u2 ON u1.a = u2.a
|
||||
ORDER BY u1.a;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: u1.a ASC NULLS LAST_|
|
||||
|_|_Projection: u1.a_|
|
||||
|_|_Inner Join: u1.a = u2.a_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| SubqueryAlias: u1_|
|
||||
|_|_Projection: t1.a_|
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| SubqueryAlias: u2_|
|
||||
|_|_Projection: t2.a_|
|
||||
|_|_TableScan: t2_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [a@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_REDACTED
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT x FROM (VALUES (2),(1)) v(x) ORDER BY x;
|
||||
|
||||
+---+
|
||||
| x |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
+---+
|
||||
|
||||
-- expected: 1,2
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x FROM (VALUES (2),(1)) v(x) ORDER BY x;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: v.x ASC NULLS LAST_|
|
||||
|_|_SubqueryAlias: v_|
|
||||
|_|_Projection: column1 AS x_|
|
||||
|_|_Values: (Int64(2)), (Int64(1))_|
|
||||
| physical_plan | SortExec: expr=[x@0 ASC NULLS LAST], preserve_partitioning=[false] |
|
||||
|_|_ProjectionExec: expr=[column1@0 as x]_|
|
||||
|_|_DataSourceExec: partitions=1, partition_sizes=[1]_|
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x LIMIT 2;
|
||||
|
||||
+---+
|
||||
| x |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
+---+
|
||||
|
||||
-- expected: 1,2
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x LIMIT 2;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Limit: skip=0, fetch=2_|
|
||||
|_|_MergeSort: sq.x ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Limit: skip=0, fetch=2_|
|
||||
|_|_Sort: sq.x ASC NULLS LAST_|
|
||||
|_|_Projection: sq.x_|
|
||||
|_|_SubqueryAlias: sq_|
|
||||
|_|_Projection: t.a AS x_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [x@0 ASC NULLS LAST], fetch=2_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_SortExec: TopK(fetch=2), expr=[x@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE t1;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE t2;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
|
||||
@@ -37,3 +37,168 @@ EXPLAIN INSERT INTO other SELECT i, 2 FROM integers WHERE i=(SELECT MAX(i) FROM
|
||||
drop table other;
|
||||
|
||||
drop table integers;
|
||||
|
||||
CREATE TABLE integers(i INTEGER, j TIMESTAMP TIME INDEX)
|
||||
PARTITION ON COLUMNS (i) (
|
||||
i < 1000,
|
||||
i >= 1000 AND i < 2000,
|
||||
i >= 2000
|
||||
);
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT * FROM integers i1 WHERE EXISTS(SELECT i FROM integers WHERE i=i1.i) ORDER BY i1.i;
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT * FROM integers i1 WHERE EXISTS(SELECT count(i) FROM integers WHERE i=i1.i) ORDER BY i1.i;
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
CREATE TABLE t(ts timestamp time index, a INT, b INT)PARTITION ON COLUMNS (a) (
|
||||
a < 1000,
|
||||
a >= 1000 AND a < 2000,
|
||||
a >= 2000
|
||||
);
|
||||
|
||||
CREATE TABLE t1(ts timestamp time index, a INT)PARTITION ON COLUMNS (a) (
|
||||
a < 1000,
|
||||
a >= 1000 AND a < 2000,
|
||||
a >= 2000
|
||||
);
|
||||
|
||||
CREATE TABLE t2(ts timestamp time index, a INT)PARTITION ON COLUMNS (a) (
|
||||
a < 1000,
|
||||
a >= 1000 AND a < 2000,
|
||||
a >= 2000
|
||||
);
|
||||
|
||||
INSERT INTO t(ts,a,b) VALUES (1,3,30),(2,1,10),(3,2,20);
|
||||
|
||||
INSERT INTO t1(ts,a) VALUES (1,1),(2,3);
|
||||
|
||||
INSERT INTO t2(ts,a) VALUES (1,2),(2,3);
|
||||
|
||||
SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
-- expected: 1,2,3
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
SELECT x, COUNT(*) AS c FROM (SELECT a AS x FROM t) sq GROUP BY x ORDER BY x;
|
||||
-- expected:
|
||||
-- x | c
|
||||
-- 1 | 1
|
||||
-- 2 | 1
|
||||
-- 3 | 1
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x, COUNT(*) AS c FROM (SELECT a AS x FROM t) sq GROUP BY x ORDER BY x;
|
||||
|
||||
SELECT DISTINCT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
-- expecetd: 1,2,3
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT DISTINCT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
SELECT sq.x FROM (SELECT a AS x FROM t) sq ORDER BY sq.x;
|
||||
-- expected: 1,2,3
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT sq.x FROM (SELECT a AS x FROM t) sq ORDER BY sq.x;
|
||||
|
||||
SELECT y FROM (SELECT x AS y FROM (SELECT a AS x FROM t) sq1) sq2 ORDER BY y;
|
||||
-- expected: 1,2,3
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT y FROM (SELECT x AS y FROM (SELECT a AS x FROM t) sq1) sq2 ORDER BY y;
|
||||
|
||||
SELECT x, x + 1 AS y FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
-- expected:
|
||||
-- (x,y)
|
||||
-- (1,2)
|
||||
-- (2,3)
|
||||
-- (3,4)
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x, x + 1 AS y FROM (SELECT a AS x FROM t) sq ORDER BY x;
|
||||
|
||||
SELECT a FROM ((SELECT a FROM t1) UNION ALL (SELECT a FROM t2)) u ORDER BY a;
|
||||
-- expected: 1,2,3,3
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT a FROM ((SELECT a FROM t1) UNION ALL (SELECT a FROM t2)) u ORDER BY a;
|
||||
|
||||
SELECT u1.a
|
||||
FROM (SELECT a FROM t1) u1
|
||||
JOIN (SELECT a FROM t2) u2 ON u1.a = u2.a
|
||||
ORDER BY u1.a;
|
||||
-- expected: 3
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT u1.a
|
||||
FROM (SELECT a FROM t1) u1
|
||||
JOIN (SELECT a FROM t2) u2 ON u1.a = u2.a
|
||||
ORDER BY u1.a;
|
||||
|
||||
SELECT x FROM (VALUES (2),(1)) v(x) ORDER BY x;
|
||||
-- expected: 1,2
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x FROM (VALUES (2),(1)) v(x) ORDER BY x;
|
||||
|
||||
SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x LIMIT 2;
|
||||
-- expected: 1,2
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN SELECT x FROM (SELECT a AS x FROM t) sq ORDER BY x LIMIT 2;
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
||||
|
||||
827
tests/cases/distributed/optimizer/first_value_advance.result
Normal file
827
tests/cases/distributed/optimizer/first_value_advance.result
Normal file
@@ -0,0 +1,827 @@
|
||||
create table t (
|
||||
ts timestamp time index,
|
||||
host string primary key,
|
||||
not_pk string,
|
||||
val double,
|
||||
) with (append_mode='true');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into t values
|
||||
(0, 'a', '🌕', 1.0),
|
||||
(1, 'b', '🌖', 2.0),
|
||||
(2, 'a', '🌗', 3.0),
|
||||
(3, 'c', '🌘', 4.0),
|
||||
(4, 'a', '🌑', 5.0),
|
||||
(5, 'b', '🌒', 6.0),
|
||||
(6, 'a', '🌓', 7.0),
|
||||
(7, 'c', '🌔', 8.0),
|
||||
(8, 'd', '🌕', 9.0);
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
admin flush_table('t');
|
||||
|
||||
+------------------------+
|
||||
| ADMIN flush_table('t') |
|
||||
+------------------------+
|
||||
| 0 |
|
||||
+------------------------+
|
||||
|
||||
select
|
||||
first_value(host order by ts),
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by host;
|
||||
|
||||
+----------------------------------------------------+------------------------------------------------------+---------------------------------------------------+
|
||||
| first_value(t.host) ORDER BY [t.ts ASC NULLS LAST] | first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST] | first_value(t.val) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+----------------------------------------------------+------------------------------------------------------+---------------------------------------------------+
|
||||
| a | 🌕 | 1.0 |
|
||||
| b | 🌖 | 2.0 |
|
||||
| c | 🌘 | 4.0 |
|
||||
| d | 🌕 | 9.0 |
|
||||
+----------------------------------------------------+------------------------------------------------------+---------------------------------------------------+
|
||||
|
||||
-- repeat the query again, ref: https://github.com/GreptimeTeam/greptimedb/issues/4650
|
||||
select
|
||||
first_value(host order by ts),
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by host;
|
||||
|
||||
+----------------------------------------------------+------------------------------------------------------+---------------------------------------------------+
|
||||
| first_value(t.host) ORDER BY [t.ts ASC NULLS LAST] | first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST] | first_value(t.val) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+----------------------------------------------------+------------------------------------------------------+---------------------------------------------------+
|
||||
| a | 🌕 | 1.0 |
|
||||
| b | 🌖 | 2.0 |
|
||||
| c | 🌘 | 4.0 |
|
||||
| d | 🌕 | 9.0 |
|
||||
+----------------------------------------------------+------------------------------------------------------+---------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select
|
||||
first_value(host order by ts),
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[t.host]], aggr=[[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
explain analyze
|
||||
select
|
||||
first_value(host order by ts),
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1} REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
select first_value(ts order by ts) from t;
|
||||
|
||||
+--------------------------------------------------+
|
||||
| first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+--------------------------------------------------+
|
||||
| 1970-01-01T00:00:00 |
|
||||
+--------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select first_value(ts order by ts) from t;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
explain analyze
|
||||
select first_value(ts order by ts) from t;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1} REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 1_|
|
||||
+-+-+-+
|
||||
|
||||
drop table t;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
create table t (
|
||||
ts timestamp time index,
|
||||
host string primary key,
|
||||
not_pk string,
|
||||
val double,
|
||||
)
|
||||
PARTITION ON COLUMNS (host) (
|
||||
host < 'b',
|
||||
host >= 'b' AND host < 'd',
|
||||
host >= 'd'
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into t values
|
||||
(0, 'a', '🌕', 1.0),
|
||||
(1, 'b', '🌖', 2.0),
|
||||
(2, 'a', '🌗', 3.0),
|
||||
(3, 'c', '🌘', 4.0),
|
||||
(4, 'a', '🌑', 5.0),
|
||||
(5, 'b', '🌒', 6.0),
|
||||
(6, 'a', '🌓', 7.0),
|
||||
(7, 'c', '🌔', 8.0),
|
||||
(8, 'd', '🌕', 9.0);
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+--------------+------------------------------------------------------+---------------------------------------------------+
|
||||
| ordered_host | first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST] | first_value(t.val) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+--------------+------------------------------------------------------+---------------------------------------------------+
|
||||
| a | 🌕 | 1.0 |
|
||||
| b | 🌖 | 2.0 |
|
||||
| c | 🌘 | 4.0 |
|
||||
| d | 🌕 | 9.0 |
|
||||
+--------------+------------------------------------------------------+---------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: ordered_host ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: first_value(t.host) ORDER BY [t.ts ASC NULLS LAST] AS ordered_host, first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[t.host]], aggr=[[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(not_pk order by ts),
|
||||
first_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 as ordered_host, first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 as ordered_host, first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 as ordered_host, first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
select first_value(ts order by ts) from t;
|
||||
|
||||
+--------------------------------------------------+
|
||||
| first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+--------------------------------------------------+
|
||||
| 1970-01-01T00:00:00 |
|
||||
+--------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select first_value(ts order by ts) from t;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__first_value_merge(__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]) AS first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select first_value(ts order by ts) from t;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=Final, gby=[], aggr=[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 1_|
|
||||
+-+-+-+
|
||||
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts),
|
||||
first_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+--------------+---------------------------------------------------+--------------------------------------------------+-------------------------+
|
||||
| ordered_host | first_value(t.val) ORDER BY [t.ts ASC NULLS LAST] | first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST] | time_window |
|
||||
+--------------+---------------------------------------------------+--------------------------------------------------+-------------------------+
|
||||
| a | 1.0 | 1970-01-01T00:00:00 | 1970-01-01T00:00:00 |
|
||||
| b | 6.0 | 1970-01-01T00:00:00.005 | 1970-01-01T00:00:00.005 |
|
||||
+--------------+---------------------------------------------------+--------------------------------------------------+-------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts),
|
||||
first_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST, ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: first_value(t.host) ORDER BY [t.ts ASC NULLS LAST] AS ordered_host, first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t.ts) AS time_window_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("5 milliseconds"),t.ts)]], aggr=[[__first_value_merge(__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST]) AS first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_merge(__first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST]) AS first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_merge(__first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]) AS first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("5 milliseconds") AS Interval(MonthDayNano)), t.ts)]], aggr=[[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as ordered_host, first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]@3 as first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t.ts)@0 as time_window]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts),
|
||||
first_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as ordered_host, first_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]@3 as first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t.ts)@0 as time_window] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[first_value(t.host) ORDER BY [t.ts ASC NULLS LAST], first_value(t.val) ORDER BY [t.ts ASC NULLS LAST], first_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@0) as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@0) as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@0) as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__first_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __first_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 2_|
|
||||
+-+-+-+
|
||||
|
||||
drop table t;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE phy (ts timestamp time index, val double, host string primary key)
|
||||
PARTITION ON COLUMNS (host) (
|
||||
host < 'b',
|
||||
host >= 'b' AND host < 'd',
|
||||
host >= 'd'
|
||||
) engine=metric with ("physical_metric_table" = "");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into
|
||||
t1(ts, val, host)
|
||||
values
|
||||
(0, 1.0, 'a'),
|
||||
(1, 2.0, 'b'),
|
||||
(2, 3.0, 'a'),
|
||||
(3, 4.0, 'c'),
|
||||
(4, 5.0, 'a'),
|
||||
(5, 6.0, 'b'),
|
||||
(6, 7.0, 'a'),
|
||||
(7, 8.0, 'c'),
|
||||
(8, 9.0, 'd');
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
select first_value(ts order by ts) from t1;
|
||||
|
||||
+----------------------------------------------------+
|
||||
| first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST] |
|
||||
+----------------------------------------------------+
|
||||
| 1970-01-01T00:00:00 |
|
||||
+----------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select first_value(ts order by ts) from t1;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__first_value_merge(__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]) AS first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select first_value(ts order by ts) from t1;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=Final, gby=[], aggr=[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 1_|
|
||||
+-+-+-+
|
||||
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts)
|
||||
from t1
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+--------------+-----------------------------------------------------+
|
||||
| ordered_host | first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST] |
|
||||
+--------------+-----------------------------------------------------+
|
||||
| a | 1.0 |
|
||||
| b | 2.0 |
|
||||
| c | 4.0 |
|
||||
| d | 9.0 |
|
||||
+--------------+-----------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts)
|
||||
from t1
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: ordered_host ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST] AS ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[t1.host]], aggr=[[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts)
|
||||
from t1
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 as ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@1 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@0 as host], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 as ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@1 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@0 as host], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 as ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@1 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@0 as host], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts),
|
||||
first_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t1
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+--------------+-----------------------------------------------------+----------------------------------------------------+-------------------------+
|
||||
| ordered_host | first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST] | first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST] | time_window |
|
||||
+--------------+-----------------------------------------------------+----------------------------------------------------+-------------------------+
|
||||
| a | 1.0 | 1970-01-01T00:00:00 | 1970-01-01T00:00:00 |
|
||||
| b | 6.0 | 1970-01-01T00:00:00.005 | 1970-01-01T00:00:00.005 |
|
||||
+--------------+-----------------------------------------------------+----------------------------------------------------+-------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts),
|
||||
first_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t1
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST, ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST] AS ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t1.ts) AS time_window_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("5 milliseconds"),t1.ts)]], aggr=[[__first_value_merge(__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST]) AS first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_merge(__first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST]) AS first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_merge(__first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]) AS first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("5 milliseconds") AS Interval(MonthDayNano)), t1.ts)]], aggr=[[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]@3 as first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t1.ts)@0 as time_window]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
first_value(host order by ts) as ordered_host,
|
||||
first_value(val order by ts),
|
||||
first_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t1
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as ordered_host, first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]@3 as first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t1.ts)@0 as time_window] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[first_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], first_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@1) as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@1) as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@1) as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__first_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __first_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 2_|
|
||||
+-+-+-+
|
||||
|
||||
drop table t1;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table phy;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
1
tests/cases/distributed/optimizer/first_value_advance.sql
Symbolic link
1
tests/cases/distributed/optimizer/first_value_advance.sql
Symbolic link
@@ -0,0 +1 @@
|
||||
../../standalone/optimizer/first_value_advance.sql
|
||||
827
tests/cases/distributed/optimizer/last_value_advance.result
Normal file
827
tests/cases/distributed/optimizer/last_value_advance.result
Normal file
@@ -0,0 +1,827 @@
|
||||
create table t (
|
||||
ts timestamp time index,
|
||||
host string primary key,
|
||||
not_pk string,
|
||||
val double,
|
||||
) with (append_mode='true');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into t values
|
||||
(0, 'a', '🌕', 1.0),
|
||||
(1, 'b', '🌖', 2.0),
|
||||
(2, 'a', '🌗', 3.0),
|
||||
(3, 'c', '🌘', 4.0),
|
||||
(4, 'a', '🌑', 5.0),
|
||||
(5, 'b', '🌒', 6.0),
|
||||
(6, 'a', '🌓', 7.0),
|
||||
(7, 'c', '🌔', 8.0),
|
||||
(8, 'd', '🌕', 9.0);
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
admin flush_table('t');
|
||||
|
||||
+------------------------+
|
||||
| ADMIN flush_table('t') |
|
||||
+------------------------+
|
||||
| 0 |
|
||||
+------------------------+
|
||||
|
||||
select
|
||||
last_value(host order by ts),
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by host;
|
||||
|
||||
+---------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
| last_value(t.host) ORDER BY [t.ts ASC NULLS LAST] | last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST] | last_value(t.val) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+---------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
| a | 🌓 | 7.0 |
|
||||
| b | 🌒 | 6.0 |
|
||||
| c | 🌔 | 8.0 |
|
||||
| d | 🌕 | 9.0 |
|
||||
+---------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
|
||||
-- repeat the query again, ref: https://github.com/GreptimeTeam/greptimedb/issues/4650
|
||||
select
|
||||
last_value(host order by ts),
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by host;
|
||||
|
||||
+---------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
| last_value(t.host) ORDER BY [t.ts ASC NULLS LAST] | last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST] | last_value(t.val) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+---------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
| a | 🌓 | 7.0 |
|
||||
| b | 🌒 | 6.0 |
|
||||
| c | 🌔 | 8.0 |
|
||||
| d | 🌕 | 9.0 |
|
||||
+---------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select
|
||||
last_value(host order by ts),
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[t.host]], aggr=[[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
explain analyze
|
||||
select
|
||||
last_value(host order by ts),
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1}, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
select last_value(ts order by ts) from t;
|
||||
|
||||
+-------------------------------------------------+
|
||||
| last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+-------------------------------------------------+
|
||||
| 1970-01-01T00:00:00.008 |
|
||||
+-------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select last_value(ts order by ts) from t;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
explain analyze
|
||||
select last_value(ts order by ts) from t;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":0, "files":1, "file_ranges":1}, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 1_|
|
||||
+-+-+-+
|
||||
|
||||
drop table t;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
create table t (
|
||||
ts timestamp time index,
|
||||
host string primary key,
|
||||
not_pk string,
|
||||
val double,
|
||||
)
|
||||
PARTITION ON COLUMNS (host) (
|
||||
host < 'b',
|
||||
host >= 'b' AND host < 'd',
|
||||
host >= 'd'
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into t values
|
||||
(0, 'a', '🌕', 1.0),
|
||||
(1, 'b', '🌖', 2.0),
|
||||
(2, 'a', '🌗', 3.0),
|
||||
(3, 'c', '🌘', 4.0),
|
||||
(4, 'a', '🌑', 5.0),
|
||||
(5, 'b', '🌒', 6.0),
|
||||
(6, 'a', '🌓', 7.0),
|
||||
(7, 'c', '🌔', 8.0),
|
||||
(8, 'd', '🌕', 9.0);
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+--------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
| ordered_host | last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST] | last_value(t.val) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+--------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
| a | 🌓 | 7.0 |
|
||||
| b | 🌒 | 6.0 |
|
||||
| c | 🌔 | 8.0 |
|
||||
| d | 🌕 | 9.0 |
|
||||
+--------------+-----------------------------------------------------+--------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: ordered_host ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: last_value(t.host) ORDER BY [t.ts ASC NULLS LAST] AS ordered_host, last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[t.host]], aggr=[[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(not_pk order by ts),
|
||||
last_value(val order by ts)
|
||||
from t
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 as ordered_host, last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 as ordered_host, last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 as ordered_host, last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@3 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
select last_value(ts order by ts) from t;
|
||||
|
||||
+-------------------------------------------------+
|
||||
| last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST] |
|
||||
+-------------------------------------------------+
|
||||
| 1970-01-01T00:00:00.008 |
|
||||
+-------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain select last_value(ts order by ts) from t;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__last_value_merge(__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]) AS last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select last_value(ts order by ts) from t;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=Final, gby=[], aggr=[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 1_|
|
||||
+-+-+-+
|
||||
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts),
|
||||
last_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+--------------+--------------------------------------------------+-------------------------------------------------+-------------------------+
|
||||
| ordered_host | last_value(t.val) ORDER BY [t.ts ASC NULLS LAST] | last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST] | time_window |
|
||||
+--------------+--------------------------------------------------+-------------------------------------------------+-------------------------+
|
||||
| a | 5.0 | 1970-01-01T00:00:00.004 | 1970-01-01T00:00:00 |
|
||||
| d | 9.0 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.005 |
|
||||
+--------------+--------------------------------------------------+-------------------------------------------------+-------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts),
|
||||
last_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST, ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: last_value(t.host) ORDER BY [t.ts ASC NULLS LAST] AS ordered_host, last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t.ts) AS time_window_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("5 milliseconds"),t.ts)]], aggr=[[__last_value_merge(__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST]) AS last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_merge(__last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST]) AS last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_merge(__last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]) AS last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("5 milliseconds") AS Interval(MonthDayNano)), t.ts)]], aggr=[[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as ordered_host, last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]@3 as last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t.ts)@0 as time_window]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts),
|
||||
last_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST]@1 as ordered_host, last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]@2 as last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]@3 as last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t.ts)@0 as time_window] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST], last_value(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@0) as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@0) as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t.ts)@0 as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@0) as date_bin(Utf8("5 milliseconds"),t.ts)], aggr=[__last_value_state(t.host) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.val) ORDER BY [t.ts ASC NULLS LAST], __last_value_state(t.ts) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 2_|
|
||||
+-+-+-+
|
||||
|
||||
drop table t;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE phy (ts timestamp time index, val double, host string primary key)
|
||||
PARTITION ON COLUMNS (host) (
|
||||
host < 'b',
|
||||
host >= 'b' AND host < 'd',
|
||||
host >= 'd'
|
||||
) engine=metric with ("physical_metric_table" = "");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into
|
||||
t1(ts, val, host)
|
||||
values
|
||||
(0, 1.0, 'a'),
|
||||
(1, 2.0, 'b'),
|
||||
(2, 3.0, 'a'),
|
||||
(3, 4.0, 'c'),
|
||||
(4, 5.0, 'a'),
|
||||
(5, 6.0, 'b'),
|
||||
(6, 7.0, 'a'),
|
||||
(7, 8.0, 'c'),
|
||||
(8, 9.0, 'd');
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
select last_value(ts order by ts) from t1;
|
||||
|
||||
+---------------------------------------------------+
|
||||
| last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST] |
|
||||
+---------------------------------------------------+
|
||||
| 1970-01-01T00:00:00.008 |
|
||||
+---------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select last_value(ts order by ts) from t1;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__last_value_merge(__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]) AS last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select last_value(ts order by ts) from t1;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=Final, gby=[], aggr=[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 1_|
|
||||
+-+-+-+
|
||||
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts)
|
||||
from t1
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+--------------+----------------------------------------------------+
|
||||
| ordered_host | last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST] |
|
||||
+--------------+----------------------------------------------------+
|
||||
| a | 7.0 |
|
||||
| b | 6.0 |
|
||||
| c | 8.0 |
|
||||
| d | 9.0 |
|
||||
+--------------+----------------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts)
|
||||
from t1
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: ordered_host ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST] AS ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]_|
|
||||
|_|_Aggregate: groupBy=[[t1.host]], aggr=[[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]]] |
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts)
|
||||
from t1
|
||||
group by host
|
||||
order by ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 as ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@1 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@0 as host], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 as ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@1 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@0 as host], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 as ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@1 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[host@0 as host], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED, "selector":"LastRow" REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts),
|
||||
last_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t1
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+--------------+----------------------------------------------------+---------------------------------------------------+-------------------------+
|
||||
| ordered_host | last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST] | last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST] | time_window |
|
||||
+--------------+----------------------------------------------------+---------------------------------------------------+-------------------------+
|
||||
| a | 5.0 | 1970-01-01T00:00:00.004 | 1970-01-01T00:00:00 |
|
||||
| d | 9.0 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.005 |
|
||||
+--------------+----------------------------------------------------+---------------------------------------------------+-------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
explain
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts),
|
||||
last_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t1
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST, ordered_host ASC NULLS LAST_|
|
||||
|_|_Projection: last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST] AS ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t1.ts) AS time_window_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("5 milliseconds"),t1.ts)]], aggr=[[__last_value_merge(__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST]) AS last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_merge(__last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST]) AS last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_merge(__last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]) AS last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("5 milliseconds") AS Interval(MonthDayNano)), t1.ts)]], aggr=[[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]]_|
|
||||
|_|_TableScan: t1_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]@3 as last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t1.ts)@0 as time_window]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (elapsed_compute.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (partitioning.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
explain analyze
|
||||
select
|
||||
last_value(host order by ts) as ordered_host,
|
||||
last_value(val order by ts),
|
||||
last_value(ts order by ts),
|
||||
date_bin('5ms'::INTERVAL, ts) as time_window
|
||||
from t1
|
||||
group by time_window
|
||||
order by time_window, ordered_host;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@3 ASC NULLS LAST, ordered_host@0 ASC NULLS LAST], preserve_REDACTED
|
||||
|_|_|_ProjectionExec: expr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST]@1 as ordered_host, last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST]@2 as last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]@3 as last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST], date_bin(Utf8("5 milliseconds"),t1.ts)@0 as time_window] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[last_value(t1.host) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.val) ORDER BY [t1.ts ASC NULLS LAST], last_value(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@1) as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@1) as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("5 milliseconds"),t1.ts)@0 as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 5000000 }, ts@1) as date_bin(Utf8("5 milliseconds"),t1.ts)], aggr=[__last_value_state(t1.host) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.val) ORDER BY [t1.ts ASC NULLS LAST], __last_value_state(t1.ts) ORDER BY [t1.ts ASC NULLS LAST]] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 2_|
|
||||
+-+-+-+
|
||||
|
||||
drop table t1;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table phy;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
1
tests/cases/distributed/optimizer/last_value_advance.sql
Symbolic link
1
tests/cases/distributed/optimizer/last_value_advance.sql
Symbolic link
@@ -0,0 +1 @@
|
||||
../../standalone/optimizer/last_value_advance.sql
|
||||
94
tests/cases/standalone/common/cast/boolean_cast.result
Normal file
94
tests/cases/standalone/common/cast/boolean_cast.result
Normal file
@@ -0,0 +1,94 @@
|
||||
-- Migrated from DuckDB test: test/sql/cast/boolean_autocast.test
|
||||
-- Description: Test boolean casts
|
||||
-- Note: GreptimeDB doesn't support automatic boolean-integer comparisons
|
||||
-- Test explicit boolean casts (supported)
|
||||
SELECT 1::BOOLEAN;
|
||||
|
||||
+----------+
|
||||
| Int64(1) |
|
||||
+----------+
|
||||
| true |
|
||||
+----------+
|
||||
|
||||
SELECT 0::BOOLEAN;
|
||||
|
||||
+----------+
|
||||
| Int64(0) |
|
||||
+----------+
|
||||
| false |
|
||||
+----------+
|
||||
|
||||
SELECT 'true'::BOOLEAN;
|
||||
|
||||
+--------------+
|
||||
| Utf8("true") |
|
||||
+--------------+
|
||||
| true |
|
||||
+--------------+
|
||||
|
||||
SELECT 'false'::BOOLEAN;
|
||||
|
||||
+---------------+
|
||||
| Utf8("false") |
|
||||
+---------------+
|
||||
| false |
|
||||
+---------------+
|
||||
|
||||
-- Test boolean operations
|
||||
SELECT true AND false;
|
||||
|
||||
+----------------------------------+
|
||||
| Boolean(true) AND Boolean(false) |
|
||||
+----------------------------------+
|
||||
| false |
|
||||
+----------------------------------+
|
||||
|
||||
SELECT true OR false;
|
||||
|
||||
+---------------------------------+
|
||||
| Boolean(true) OR Boolean(false) |
|
||||
+---------------------------------+
|
||||
| true |
|
||||
+---------------------------------+
|
||||
|
||||
SELECT NOT true;
|
||||
|
||||
+-------------------+
|
||||
| NOT Boolean(true) |
|
||||
+-------------------+
|
||||
| false |
|
||||
+-------------------+
|
||||
|
||||
SELECT NOT false;
|
||||
|
||||
+--------------------+
|
||||
| NOT Boolean(false) |
|
||||
+--------------------+
|
||||
| true |
|
||||
+--------------------+
|
||||
|
||||
-- Test boolean comparisons (same type)
|
||||
SELECT true = true;
|
||||
|
||||
+-------------------------------+
|
||||
| Boolean(true) = Boolean(true) |
|
||||
+-------------------------------+
|
||||
| true |
|
||||
+-------------------------------+
|
||||
|
||||
SELECT true = false;
|
||||
|
||||
+--------------------------------+
|
||||
| Boolean(true) = Boolean(false) |
|
||||
+--------------------------------+
|
||||
| false |
|
||||
+--------------------------------+
|
||||
|
||||
SELECT false = false;
|
||||
|
||||
+---------------------------------+
|
||||
| Boolean(false) = Boolean(false) |
|
||||
+---------------------------------+
|
||||
| true |
|
||||
+---------------------------------+
|
||||
|
||||
28
tests/cases/standalone/common/cast/boolean_cast.sql
Normal file
28
tests/cases/standalone/common/cast/boolean_cast.sql
Normal file
@@ -0,0 +1,28 @@
|
||||
-- Migrated from DuckDB test: test/sql/cast/boolean_autocast.test
|
||||
-- Description: Test boolean casts
|
||||
-- Note: GreptimeDB doesn't support automatic boolean-integer comparisons
|
||||
|
||||
-- Test explicit boolean casts (supported)
|
||||
SELECT 1::BOOLEAN;
|
||||
|
||||
SELECT 0::BOOLEAN;
|
||||
|
||||
SELECT 'true'::BOOLEAN;
|
||||
|
||||
SELECT 'false'::BOOLEAN;
|
||||
|
||||
-- Test boolean operations
|
||||
SELECT true AND false;
|
||||
|
||||
SELECT true OR false;
|
||||
|
||||
SELECT NOT true;
|
||||
|
||||
SELECT NOT false;
|
||||
|
||||
-- Test boolean comparisons (same type)
|
||||
SELECT true = true;
|
||||
|
||||
SELECT true = false;
|
||||
|
||||
SELECT false = false;
|
||||
95
tests/cases/standalone/common/cast/string_to_integer.result
Normal file
95
tests/cases/standalone/common/cast/string_to_integer.result
Normal file
@@ -0,0 +1,95 @@
|
||||
-- Migrated from DuckDB test: test/sql/cast/string_to_integer_decimal_cast.test
|
||||
-- Description: String to number casts
|
||||
-- Note: GreptimeDB doesn't support decimal string to integer conversion
|
||||
-- Valid integer string conversions
|
||||
SELECT '0'::INT;
|
||||
|
||||
+-----------+
|
||||
| Utf8("0") |
|
||||
+-----------+
|
||||
| 0 |
|
||||
+-----------+
|
||||
|
||||
SELECT '1'::INT;
|
||||
|
||||
+-----------+
|
||||
| Utf8("1") |
|
||||
+-----------+
|
||||
| 1 |
|
||||
+-----------+
|
||||
|
||||
SELECT '1000000'::INT;
|
||||
|
||||
+-----------------+
|
||||
| Utf8("1000000") |
|
||||
+-----------------+
|
||||
| 1000000 |
|
||||
+-----------------+
|
||||
|
||||
SELECT '-1'::INT;
|
||||
|
||||
+------------+
|
||||
| Utf8("-1") |
|
||||
+------------+
|
||||
| -1 |
|
||||
+------------+
|
||||
|
||||
SELECT '-1000'::INT;
|
||||
|
||||
+---------------+
|
||||
| Utf8("-1000") |
|
||||
+---------------+
|
||||
| -1000 |
|
||||
+---------------+
|
||||
|
||||
-- Test with BIGINT
|
||||
SELECT '0'::BIGINT;
|
||||
|
||||
+-----------+
|
||||
| Utf8("0") |
|
||||
+-----------+
|
||||
| 0 |
|
||||
+-----------+
|
||||
|
||||
SELECT '1000000'::BIGINT;
|
||||
|
||||
+-----------------+
|
||||
| Utf8("1000000") |
|
||||
+-----------------+
|
||||
| 1000000 |
|
||||
+-----------------+
|
||||
|
||||
-- Convert decimal strings to DOUBLE first, then to INT if needed
|
||||
SELECT '0.5'::DOUBLE;
|
||||
|
||||
+-------------+
|
||||
| Utf8("0.5") |
|
||||
+-------------+
|
||||
| 0.5 |
|
||||
+-------------+
|
||||
|
||||
SELECT '1.50004'::DOUBLE;
|
||||
|
||||
+-----------------+
|
||||
| Utf8("1.50004") |
|
||||
+-----------------+
|
||||
| 1.50004 |
|
||||
+-----------------+
|
||||
|
||||
SELECT '-0.5'::DOUBLE;
|
||||
|
||||
+--------------+
|
||||
| Utf8("-0.5") |
|
||||
+--------------+
|
||||
| -0.5 |
|
||||
+--------------+
|
||||
|
||||
-- Test invalid cases (should error)
|
||||
SELECT '0.5'::INT;
|
||||
|
||||
Error: 3001(EngineExecuteQuery), Cast error: Cannot cast string '0.5' to value of Int32 type
|
||||
|
||||
SELECT 'abc'::INT;
|
||||
|
||||
Error: 3001(EngineExecuteQuery), Cast error: Cannot cast string 'abc' to value of Int32 type
|
||||
|
||||
31
tests/cases/standalone/common/cast/string_to_integer.sql
Normal file
31
tests/cases/standalone/common/cast/string_to_integer.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
-- Migrated from DuckDB test: test/sql/cast/string_to_integer_decimal_cast.test
|
||||
-- Description: String to number casts
|
||||
-- Note: GreptimeDB doesn't support decimal string to integer conversion
|
||||
|
||||
-- Valid integer string conversions
|
||||
SELECT '0'::INT;
|
||||
|
||||
SELECT '1'::INT;
|
||||
|
||||
SELECT '1000000'::INT;
|
||||
|
||||
SELECT '-1'::INT;
|
||||
|
||||
SELECT '-1000'::INT;
|
||||
|
||||
-- Test with BIGINT
|
||||
SELECT '0'::BIGINT;
|
||||
|
||||
SELECT '1000000'::BIGINT;
|
||||
|
||||
-- Convert decimal strings to DOUBLE first, then to INT if needed
|
||||
SELECT '0.5'::DOUBLE;
|
||||
|
||||
SELECT '1.50004'::DOUBLE;
|
||||
|
||||
SELECT '-0.5'::DOUBLE;
|
||||
|
||||
-- Test invalid cases (should error)
|
||||
SELECT '0.5'::INT;
|
||||
|
||||
SELECT 'abc'::INT;
|
||||
@@ -386,6 +386,35 @@ select ts, count(*) from logical_table_4 GROUP BY ts ORDER BY ts;
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
select * from logical_table_4;
|
||||
|
||||
+-----------------------+-----+------+-------------------+---------------------+
|
||||
| another_partition_key | cpu | host | one_partition_key | ts |
|
||||
+-----------------------+-----+------+-------------------+---------------------+
|
||||
| | 1.0 | | | 2023-01-01T00:00:00 |
|
||||
| | 2.0 | | | 2023-01-01T00:00:01 |
|
||||
| | 3.0 | | | 2023-01-01T00:00:02 |
|
||||
+-----------------------+-----+------+-------------------+---------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RepartitionExec:.*) RepartitionExec: REDACTED
|
||||
EXPLAIN select * from logical_table_4;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Projection: logical_table_4.another_partition_key, logical_table_4.cpu, logical_table_4.host, logical_table_4.one_partition_key, logical_table_4.ts |
|
||||
|_|_TableScan: logical_table_4_|
|
||||
|_| ]]_|
|
||||
| physical_plan | CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
drop table logical_table_2;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -140,6 +140,15 @@ select ts, count(*) from logical_table_4 GROUP BY ts ORDER BY ts;
|
||||
EXPLAIN
|
||||
select ts, count(*) from logical_table_4 GROUP BY ts ORDER BY ts;
|
||||
|
||||
select * from logical_table_4;
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RepartitionExec:.*) RepartitionExec: REDACTED
|
||||
EXPLAIN select * from logical_table_4;
|
||||
|
||||
drop table logical_table_2;
|
||||
|
||||
drop table logical_table_3;
|
||||
|
||||
92
tests/cases/standalone/common/error/incorrect_sql.result
Normal file
92
tests/cases/standalone/common/error/incorrect_sql.result
Normal file
@@ -0,0 +1,92 @@
|
||||
-- Migrated from DuckDB test: test/sql/error/incorrect_sql.test
|
||||
-- Typo in SELECT
|
||||
SELEC 42;
|
||||
|
||||
Error: 1001(Unsupported), SQL statement is not supported, keyword: SELEC
|
||||
|
||||
-- Unrecognized column
|
||||
SELECT x FROM (SELECT 1 as y);
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: No field named x. Valid fields are y.
|
||||
|
||||
-- Unrecognized function
|
||||
SELECT FUNFUNFUN();
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Invalid function 'funfunfun'.
|
||||
Did you mean 'range_fn'?
|
||||
|
||||
-- Wrong aggregate parameters
|
||||
SELECT SUM(42, 84, 11, 'hello');
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: Function 'sum' user-defined coercion failed with "Execution error: sum function requires 1 argument, got 4" No function matches the given name and argument types 'sum(Int64, Int64, Int64, Utf8)'. You might need to add explicit type casts.
|
||||
Candidate functions:
|
||||
sum(UserDefined)
|
||||
|
||||
-- No matching function signature
|
||||
SELECT cos(0, 1, 2, 3);
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Failed to coerce arguments to satisfy a call to 'cos' function: coercion from [Int64, Int64, Int64, Int64] to the signature Uniform(1, [Float64, Float32]) failed No function matches the given name and argument types 'cos(Int64, Int64, Int64, Int64)'. You might need to add explicit type casts.
|
||||
Candidate functions:
|
||||
cos(Float64/Float32)
|
||||
|
||||
-- Multiple WHERE clauses
|
||||
SELECT 42 WHERE 1=1 WHERE 1=0;
|
||||
|
||||
Error: 1001(Unsupported), SQL statement is not supported, keyword: WHERE
|
||||
|
||||
-- Multiple statements without semicolon
|
||||
SELECT 42
|
||||
SELECT 42;
|
||||
|
||||
Error: 1001(Unsupported), SQL statement is not supported, keyword: SELECT
|
||||
|
||||
-- Non-existent table
|
||||
SELECT * FROM integers2;
|
||||
|
||||
Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.public.integers2
|
||||
|
||||
-- Non-existent schema
|
||||
SELECT * FROM bla.integers2;
|
||||
|
||||
Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.bla.integers2
|
||||
|
||||
CREATE TABLE integers(integ INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE strings(str VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE chickens(feather INTEGER, beak INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Non-existent column
|
||||
SELECT feathe FROM chickens;
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: No field named feathe. Valid fields are chickens.feather, chickens.beak, chickens.ts.
|
||||
|
||||
-- Non-existent column with multiple tables
|
||||
SELECT feathe FROM chickens, integers, strings;
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: No field named feathe. Valid fields are chickens.feather, chickens.beak, chickens.ts, integers.integ, integers.ts, strings.str, strings.ts.
|
||||
|
||||
-- Ambiguous column reference
|
||||
SELECT ts FROM chickens, integers;
|
||||
|
||||
Error: 3000(PlanQuery), Failed to plan SQL: Ambiguous reference to unqualified field ts
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE chickens;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE strings;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
51
tests/cases/standalone/common/error/incorrect_sql.sql
Normal file
51
tests/cases/standalone/common/error/incorrect_sql.sql
Normal file
@@ -0,0 +1,51 @@
|
||||
-- Migrated from DuckDB test: test/sql/error/incorrect_sql.test
|
||||
|
||||
-- Typo in SELECT
|
||||
SELEC 42;
|
||||
|
||||
-- Unrecognized column
|
||||
SELECT x FROM (SELECT 1 as y);
|
||||
|
||||
-- Unrecognized function
|
||||
SELECT FUNFUNFUN();
|
||||
|
||||
-- Wrong aggregate parameters
|
||||
SELECT SUM(42, 84, 11, 'hello');
|
||||
|
||||
-- No matching function signature
|
||||
SELECT cos(0, 1, 2, 3);
|
||||
|
||||
-- Multiple WHERE clauses
|
||||
SELECT 42 WHERE 1=1 WHERE 1=0;
|
||||
|
||||
-- Multiple statements without semicolon
|
||||
SELECT 42
|
||||
SELECT 42;
|
||||
|
||||
-- Non-existent table
|
||||
SELECT * FROM integers2;
|
||||
|
||||
-- Non-existent schema
|
||||
SELECT * FROM bla.integers2;
|
||||
|
||||
CREATE TABLE integers(integ INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE strings(str VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE chickens(feather INTEGER, beak INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
-- Non-existent column
|
||||
SELECT feathe FROM chickens;
|
||||
|
||||
-- Non-existent column with multiple tables
|
||||
SELECT feathe FROM chickens, integers, strings;
|
||||
|
||||
-- Ambiguous column reference
|
||||
SELECT ts FROM chickens, integers;
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE chickens;
|
||||
|
||||
DROP TABLE strings;
|
||||
|
||||
DROP TABLE integers;
|
||||
125
tests/cases/standalone/common/filter/constant_comparisons.result
Normal file
125
tests/cases/standalone/common/filter/constant_comparisons.result
Normal file
@@ -0,0 +1,125 @@
|
||||
-- Migrated from DuckDB test: test/sql/filter/test_constant_comparisons.test
|
||||
-- Description: Test expressions with constant comparisons
|
||||
CREATE TABLE integers(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO integers VALUES (2, 12, 1000);
|
||||
|
||||
Affected Rows: 1
|
||||
|
||||
-- Test various constant comparisons
|
||||
SELECT * FROM integers WHERE 2=2;
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2=3;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
SELECT * FROM integers WHERE 2<>3;
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2<>2;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
SELECT * FROM integers WHERE 2>1;
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2>2;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
SELECT * FROM integers WHERE 2>=2;
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2>=3;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
SELECT * FROM integers WHERE 2<3;
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2<2;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
SELECT * FROM integers WHERE 2<=2;
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2<=1;
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
-- NULL comparisons
|
||||
SELECT a=NULL FROM integers;
|
||||
|
||||
+-------------------+
|
||||
| integers.a = NULL |
|
||||
+-------------------+
|
||||
| |
|
||||
+-------------------+
|
||||
|
||||
SELECT NULL=a FROM integers;
|
||||
|
||||
+-------------------+
|
||||
| NULL = integers.a |
|
||||
+-------------------+
|
||||
| |
|
||||
+-------------------+
|
||||
|
||||
-- IN clause with constants
|
||||
SELECT * FROM integers WHERE 2 IN (2, 3, 4, 5);
|
||||
|
||||
+---+----+---------------------+
|
||||
| a | b | ts |
|
||||
+---+----+---------------------+
|
||||
| 2 | 12 | 1970-01-01T00:00:01 |
|
||||
+---+----+---------------------+
|
||||
|
||||
SELECT * FROM integers WHERE 2 IN (1, 3, 4, 5);
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE integers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
-- Migrated from DuckDB test: test/sql/filter/test_constant_comparisons.test
|
||||
-- Description: Test expressions with constant comparisons
|
||||
|
||||
CREATE TABLE integers(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO integers VALUES (2, 12, 1000);
|
||||
|
||||
-- Test various constant comparisons
|
||||
SELECT * FROM integers WHERE 2=2;
|
||||
|
||||
SELECT * FROM integers WHERE 2=3;
|
||||
|
||||
SELECT * FROM integers WHERE 2<>3;
|
||||
|
||||
SELECT * FROM integers WHERE 2<>2;
|
||||
|
||||
SELECT * FROM integers WHERE 2>1;
|
||||
|
||||
SELECT * FROM integers WHERE 2>2;
|
||||
|
||||
SELECT * FROM integers WHERE 2>=2;
|
||||
|
||||
SELECT * FROM integers WHERE 2>=3;
|
||||
|
||||
SELECT * FROM integers WHERE 2<3;
|
||||
|
||||
SELECT * FROM integers WHERE 2<2;
|
||||
|
||||
SELECT * FROM integers WHERE 2<=2;
|
||||
|
||||
SELECT * FROM integers WHERE 2<=1;
|
||||
|
||||
-- NULL comparisons
|
||||
SELECT a=NULL FROM integers;
|
||||
|
||||
SELECT NULL=a FROM integers;
|
||||
|
||||
-- IN clause with constants
|
||||
SELECT * FROM integers WHERE 2 IN (2, 3, 4, 5);
|
||||
|
||||
SELECT * FROM integers WHERE 2 IN (1, 3, 4, 5);
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE integers;
|
||||
74
tests/cases/standalone/common/join/complex_join_expr.result
Normal file
74
tests/cases/standalone/common/join/complex_join_expr.result
Normal file
@@ -0,0 +1,74 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/test_complex_join_expr.test
|
||||
CREATE TABLE test (a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO test VALUES (4, 1, 1000), (2, 2, 2000);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
CREATE TABLE test2 (b INTEGER, c INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO test2 VALUES (1, 2, 3000), (3, 0, 4000);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
-- INNER JOIN with complex expression
|
||||
SELECT * FROM test JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a;
|
||||
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| a | b | ts | b | c | ts |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| 4 | 1 | 1970-01-01T00:00:01 | 3 | 0 | 1970-01-01T00:00:04 |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
|
||||
-- LEFT JOIN with complex expression
|
||||
SELECT * FROM test LEFT JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a;
|
||||
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| a | b | ts | b | c | ts |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| 2 | 2 | 1970-01-01T00:00:02 | | | |
|
||||
| 4 | 1 | 1970-01-01T00:00:01 | 3 | 0 | 1970-01-01T00:00:04 |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
|
||||
-- RIGHT JOIN with complex expression
|
||||
SELECT * FROM test RIGHT JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a NULLS FIRST;
|
||||
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| a | b | ts | b | c | ts |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| | | | 1 | 2 | 1970-01-01T00:00:03 |
|
||||
| 4 | 1 | 1970-01-01T00:00:01 | 3 | 0 | 1970-01-01T00:00:04 |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
|
||||
-- FULL JOIN with complex expression
|
||||
SELECT * FROM test FULL OUTER JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a NULLS FIRST;
|
||||
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| a | b | ts | b | c | ts |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| | | | 1 | 2 | 1970-01-01T00:00:03 |
|
||||
| 2 | 2 | 1970-01-01T00:00:02 | | | |
|
||||
| 4 | 1 | 1970-01-01T00:00:01 | 3 | 0 | 1970-01-01T00:00:04 |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
|
||||
-- Basic equi-join
|
||||
SELECT * FROM test JOIN test2 ON test.b = test2.b ORDER BY test.a;
|
||||
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| a | b | ts | b | c | ts |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
| 4 | 1 | 1970-01-01T00:00:01 | 1 | 2 | 1970-01-01T00:00:03 |
|
||||
+---+---+---------------------+---+---+---------------------+
|
||||
|
||||
DROP TABLE test2;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
27
tests/cases/standalone/common/join/complex_join_expr.sql
Normal file
27
tests/cases/standalone/common/join/complex_join_expr.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/test_complex_join_expr.test
|
||||
CREATE TABLE test (a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO test VALUES (4, 1, 1000), (2, 2, 2000);
|
||||
|
||||
CREATE TABLE test2 (b INTEGER, c INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO test2 VALUES (1, 2, 3000), (3, 0, 4000);
|
||||
|
||||
-- INNER JOIN with complex expression
|
||||
SELECT * FROM test JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a;
|
||||
|
||||
-- LEFT JOIN with complex expression
|
||||
SELECT * FROM test LEFT JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a;
|
||||
|
||||
-- RIGHT JOIN with complex expression
|
||||
SELECT * FROM test RIGHT JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a NULLS FIRST;
|
||||
|
||||
-- FULL JOIN with complex expression
|
||||
SELECT * FROM test FULL OUTER JOIN test2 ON test.a+test2.c=test.b+test2.b ORDER BY test.a NULLS FIRST;
|
||||
|
||||
-- Basic equi-join
|
||||
SELECT * FROM test JOIN test2 ON test.b = test2.b ORDER BY test.a;
|
||||
|
||||
DROP TABLE test2;
|
||||
|
||||
DROP TABLE test;
|
||||
162
tests/cases/standalone/common/join/cross_join_advanced.result
Normal file
162
tests/cases/standalone/common/join/cross_join_advanced.result
Normal file
@@ -0,0 +1,162 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/cross_product/ advanced tests
|
||||
-- Tests advanced cross join scenarios
|
||||
CREATE TABLE products(prod_id INTEGER, prod_name VARCHAR, price DOUBLE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE stores(store_id INTEGER, store_name VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE categories(cat_id INTEGER, cat_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO products VALUES
|
||||
(1, 'Laptop', 999.99, 1000), (2, 'Mouse', 29.99, 2000), (3, 'Monitor', 299.99, 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO stores VALUES
|
||||
(1, 'TechStore', 'NYC', 1000), (2, 'GadgetShop', 'LA', 2000);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
INSERT INTO categories VALUES
|
||||
(1, 'Electronics', 1000), (2, 'Accessories', 2000);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
-- Basic cross join
|
||||
SELECT
|
||||
p.prod_name, s.store_name, s.city
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
ORDER BY p.prod_id, s.store_id;
|
||||
|
||||
+-----------+------------+------+
|
||||
| prod_name | store_name | city |
|
||||
+-----------+------------+------+
|
||||
| Laptop | TechStore | NYC |
|
||||
| Laptop | GadgetShop | LA |
|
||||
| Mouse | TechStore | NYC |
|
||||
| Mouse | GadgetShop | LA |
|
||||
| Monitor | TechStore | NYC |
|
||||
| Monitor | GadgetShop | LA |
|
||||
+-----------+------------+------+
|
||||
|
||||
-- Cross join with filtering
|
||||
SELECT
|
||||
p.prod_name, s.store_name, p.price
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
WHERE p.price > 100.00
|
||||
ORDER BY p.price DESC, s.store_name;
|
||||
|
||||
+-----------+------------+--------+
|
||||
| prod_name | store_name | price |
|
||||
+-----------+------------+--------+
|
||||
| Laptop | GadgetShop | 999.99 |
|
||||
| Laptop | TechStore | 999.99 |
|
||||
| Monitor | GadgetShop | 299.99 |
|
||||
| Monitor | TechStore | 299.99 |
|
||||
+-----------+------------+--------+
|
||||
|
||||
-- Triple cross join
|
||||
SELECT
|
||||
p.prod_name, s.store_name, c.cat_name,
|
||||
CASE WHEN p.price > 500 THEN 'Premium' ELSE 'Standard' END as tier
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
CROSS JOIN categories c
|
||||
ORDER BY p.prod_id, s.store_id, c.cat_id;
|
||||
|
||||
+-----------+------------+-------------+----------+
|
||||
| prod_name | store_name | cat_name | tier |
|
||||
+-----------+------------+-------------+----------+
|
||||
| Laptop | TechStore | Electronics | Premium |
|
||||
| Laptop | TechStore | Accessories | Premium |
|
||||
| Laptop | GadgetShop | Electronics | Premium |
|
||||
| Laptop | GadgetShop | Accessories | Premium |
|
||||
| Mouse | TechStore | Electronics | Standard |
|
||||
| Mouse | TechStore | Accessories | Standard |
|
||||
| Mouse | GadgetShop | Electronics | Standard |
|
||||
| Mouse | GadgetShop | Accessories | Standard |
|
||||
| Monitor | TechStore | Electronics | Standard |
|
||||
| Monitor | TechStore | Accessories | Standard |
|
||||
| Monitor | GadgetShop | Electronics | Standard |
|
||||
| Monitor | GadgetShop | Accessories | Standard |
|
||||
+-----------+------------+-------------+----------+
|
||||
|
||||
-- Cross join with aggregation
|
||||
SELECT
|
||||
s.city,
|
||||
COUNT(*) as product_store_combinations,
|
||||
AVG(p.price) as avg_price,
|
||||
SUM(p.price) as total_inventory_value
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
GROUP BY s.city
|
||||
ORDER BY s.city;
|
||||
|
||||
+------+----------------------------+-------------------+-----------------------+
|
||||
| city | product_store_combinations | avg_price | total_inventory_value |
|
||||
+------+----------------------------+-------------------+-----------------------+
|
||||
| LA | 3 | 443.3233333333333 | 1329.97 |
|
||||
| NYC | 3 | 443.3233333333333 | 1329.97 |
|
||||
+------+----------------------------+-------------------+-----------------------+
|
||||
|
||||
-- Cross join for inventory matrix
|
||||
SELECT
|
||||
p.prod_name,
|
||||
SUM(CASE WHEN s.city = 'NYC' THEN 1 ELSE 0 END) as nyc_availability,
|
||||
SUM(CASE WHEN s.city = 'LA' THEN 1 ELSE 0 END) as la_availability,
|
||||
COUNT(s.store_id) as total_store_availability
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
GROUP BY p.prod_name, p.prod_id
|
||||
ORDER BY p.prod_id;
|
||||
|
||||
+-----------+------------------+-----------------+--------------------------+
|
||||
| prod_name | nyc_availability | la_availability | total_store_availability |
|
||||
+-----------+------------------+-----------------+--------------------------+
|
||||
| Laptop | 1 | 1 | 2 |
|
||||
| Mouse | 1 | 1 | 2 |
|
||||
| Monitor | 1 | 1 | 2 |
|
||||
+-----------+------------------+-----------------+--------------------------+
|
||||
|
||||
-- Cross join with conditions and calculations
|
||||
SELECT
|
||||
p.prod_name,
|
||||
s.store_name,
|
||||
p.price,
|
||||
p.price * 0.1 as store_commission,
|
||||
p.price * 1.08 as price_with_tax
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
WHERE p.price BETWEEN 25.00 AND 1000.00
|
||||
ORDER BY p.price DESC, s.store_name;
|
||||
|
||||
+-----------+------------+--------+--------------------+--------------------+
|
||||
| prod_name | store_name | price | store_commission | price_with_tax |
|
||||
+-----------+------------+--------+--------------------+--------------------+
|
||||
| Laptop | GadgetShop | 999.99 | 99.99900000000001 | 1079.9892 |
|
||||
| Laptop | TechStore | 999.99 | 99.99900000000001 | 1079.9892 |
|
||||
| Monitor | GadgetShop | 299.99 | 29.999000000000002 | 323.98920000000004 |
|
||||
| Monitor | TechStore | 299.99 | 29.999000000000002 | 323.98920000000004 |
|
||||
| Mouse | GadgetShop | 29.99 | 2.999 | 32.3892 |
|
||||
| Mouse | TechStore | 29.99 | 2.999 | 32.3892 |
|
||||
+-----------+------------+--------+--------------------+--------------------+
|
||||
|
||||
DROP TABLE products;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE stores;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE categories;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
81
tests/cases/standalone/common/join/cross_join_advanced.sql
Normal file
81
tests/cases/standalone/common/join/cross_join_advanced.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/cross_product/ advanced tests
|
||||
-- Tests advanced cross join scenarios
|
||||
|
||||
CREATE TABLE products(prod_id INTEGER, prod_name VARCHAR, price DOUBLE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE stores(store_id INTEGER, store_name VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE categories(cat_id INTEGER, cat_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO products VALUES
|
||||
(1, 'Laptop', 999.99, 1000), (2, 'Mouse', 29.99, 2000), (3, 'Monitor', 299.99, 3000);
|
||||
|
||||
INSERT INTO stores VALUES
|
||||
(1, 'TechStore', 'NYC', 1000), (2, 'GadgetShop', 'LA', 2000);
|
||||
|
||||
INSERT INTO categories VALUES
|
||||
(1, 'Electronics', 1000), (2, 'Accessories', 2000);
|
||||
|
||||
-- Basic cross join
|
||||
SELECT
|
||||
p.prod_name, s.store_name, s.city
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
ORDER BY p.prod_id, s.store_id;
|
||||
|
||||
-- Cross join with filtering
|
||||
SELECT
|
||||
p.prod_name, s.store_name, p.price
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
WHERE p.price > 100.00
|
||||
ORDER BY p.price DESC, s.store_name;
|
||||
|
||||
-- Triple cross join
|
||||
SELECT
|
||||
p.prod_name, s.store_name, c.cat_name,
|
||||
CASE WHEN p.price > 500 THEN 'Premium' ELSE 'Standard' END as tier
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
CROSS JOIN categories c
|
||||
ORDER BY p.prod_id, s.store_id, c.cat_id;
|
||||
|
||||
-- Cross join with aggregation
|
||||
SELECT
|
||||
s.city,
|
||||
COUNT(*) as product_store_combinations,
|
||||
AVG(p.price) as avg_price,
|
||||
SUM(p.price) as total_inventory_value
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
GROUP BY s.city
|
||||
ORDER BY s.city;
|
||||
|
||||
-- Cross join for inventory matrix
|
||||
SELECT
|
||||
p.prod_name,
|
||||
SUM(CASE WHEN s.city = 'NYC' THEN 1 ELSE 0 END) as nyc_availability,
|
||||
SUM(CASE WHEN s.city = 'LA' THEN 1 ELSE 0 END) as la_availability,
|
||||
COUNT(s.store_id) as total_store_availability
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
GROUP BY p.prod_name, p.prod_id
|
||||
ORDER BY p.prod_id;
|
||||
|
||||
-- Cross join with conditions and calculations
|
||||
SELECT
|
||||
p.prod_name,
|
||||
s.store_name,
|
||||
p.price,
|
||||
p.price * 0.1 as store_commission,
|
||||
p.price * 1.08 as price_with_tax
|
||||
FROM products p
|
||||
CROSS JOIN stores s
|
||||
WHERE p.price BETWEEN 25.00 AND 1000.00
|
||||
ORDER BY p.price DESC, s.store_name;
|
||||
|
||||
DROP TABLE products;
|
||||
|
||||
DROP TABLE stores;
|
||||
|
||||
DROP TABLE categories;
|
||||
63
tests/cases/standalone/common/join/cross_product.result
Normal file
63
tests/cases/standalone/common/join/cross_product.result
Normal file
@@ -0,0 +1,63 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/cross_product/test_cross_product.test
|
||||
-- Tests CROSS JOIN functionality
|
||||
CREATE TABLE small_table (a INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE another_table (b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO small_table VALUES (1, 1000), (2, 2000);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
INSERT INTO another_table VALUES (10, 3000), (20, 4000), (30, 5000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- Basic CROSS JOIN
|
||||
SELECT * FROM small_table CROSS JOIN another_table ORDER BY a, b;
|
||||
|
||||
+---+---------------------+----+---------------------+
|
||||
| a | ts | b | ts |
|
||||
+---+---------------------+----+---------------------+
|
||||
| 1 | 1970-01-01T00:00:01 | 10 | 1970-01-01T00:00:03 |
|
||||
| 1 | 1970-01-01T00:00:01 | 20 | 1970-01-01T00:00:04 |
|
||||
| 1 | 1970-01-01T00:00:01 | 30 | 1970-01-01T00:00:05 |
|
||||
| 2 | 1970-01-01T00:00:02 | 10 | 1970-01-01T00:00:03 |
|
||||
| 2 | 1970-01-01T00:00:02 | 20 | 1970-01-01T00:00:04 |
|
||||
| 2 | 1970-01-01T00:00:02 | 30 | 1970-01-01T00:00:05 |
|
||||
+---+---------------------+----+---------------------+
|
||||
|
||||
-- CROSS JOIN with WHERE filter
|
||||
SELECT * FROM small_table CROSS JOIN another_table WHERE a + b < 25 ORDER BY a, b;
|
||||
|
||||
+---+---------------------+----+---------------------+
|
||||
| a | ts | b | ts |
|
||||
+---+---------------------+----+---------------------+
|
||||
| 1 | 1970-01-01T00:00:01 | 10 | 1970-01-01T00:00:03 |
|
||||
| 1 | 1970-01-01T00:00:01 | 20 | 1970-01-01T00:00:04 |
|
||||
| 2 | 1970-01-01T00:00:02 | 10 | 1970-01-01T00:00:03 |
|
||||
| 2 | 1970-01-01T00:00:02 | 20 | 1970-01-01T00:00:04 |
|
||||
+---+---------------------+----+---------------------+
|
||||
|
||||
-- CROSS JOIN with aliases
|
||||
SELECT s.a, t.b FROM small_table s CROSS JOIN another_table t WHERE s.a = 1 ORDER BY b;
|
||||
|
||||
+---+----+
|
||||
| a | b |
|
||||
+---+----+
|
||||
| 1 | 10 |
|
||||
| 1 | 20 |
|
||||
| 1 | 30 |
|
||||
+---+----+
|
||||
|
||||
DROP TABLE another_table;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE small_table;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
23
tests/cases/standalone/common/join/cross_product.sql
Normal file
23
tests/cases/standalone/common/join/cross_product.sql
Normal file
@@ -0,0 +1,23 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/cross_product/test_cross_product.test
|
||||
-- Tests CROSS JOIN functionality
|
||||
|
||||
CREATE TABLE small_table (a INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE another_table (b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO small_table VALUES (1, 1000), (2, 2000);
|
||||
|
||||
INSERT INTO another_table VALUES (10, 3000), (20, 4000), (30, 5000);
|
||||
|
||||
-- Basic CROSS JOIN
|
||||
SELECT * FROM small_table CROSS JOIN another_table ORDER BY a, b;
|
||||
|
||||
-- CROSS JOIN with WHERE filter
|
||||
SELECT * FROM small_table CROSS JOIN another_table WHERE a + b < 25 ORDER BY a, b;
|
||||
|
||||
-- CROSS JOIN with aliases
|
||||
SELECT s.a, t.b FROM small_table s CROSS JOIN another_table t WHERE s.a = 1 ORDER BY b;
|
||||
|
||||
DROP TABLE another_table;
|
||||
|
||||
DROP TABLE small_table;
|
||||
61
tests/cases/standalone/common/join/full_outer_join.result
Normal file
61
tests/cases/standalone/common/join/full_outer_join.result
Normal file
@@ -0,0 +1,61 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/full_outer/test_full_outer_join.test
|
||||
-- Tests FULL OUTER JOIN scenarios
|
||||
CREATE TABLE left_full("id" INTEGER, "name" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE right_full("id" INTEGER, "value" INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO left_full VALUES (1, 'Alice', 1000), (2, 'Bob', 2000), (3, 'Carol', 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO right_full VALUES (2, 200, 4000), (3, 300, 5000), (4, 400, 6000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- Basic FULL OUTER JOIN
|
||||
SELECT * FROM left_full l FULL OUTER JOIN right_full r ON l."id" = r."id" ORDER BY l."id" NULLS LAST, r."id" NULLS LAST;
|
||||
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
| id | name | ts | id | value | ts |
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
| 1 | Alice | 1970-01-01T00:00:01 | | | |
|
||||
| 2 | Bob | 1970-01-01T00:00:02 | 2 | 200 | 1970-01-01T00:00:04 |
|
||||
| 3 | Carol | 1970-01-01T00:00:03 | 3 | 300 | 1970-01-01T00:00:05 |
|
||||
| | | | 4 | 400 | 1970-01-01T00:00:06 |
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
|
||||
-- FULL OUTER JOIN with WHERE on result
|
||||
SELECT * FROM left_full l FULL OUTER JOIN right_full r ON l."id" = r."id" WHERE l."name" IS NULL OR r."value" IS NULL ORDER BY l."id" NULLS LAST, r."id" NULLS LAST;
|
||||
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
| id | name | ts | id | value | ts |
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
| 1 | Alice | 1970-01-01T00:00:01 | | | |
|
||||
| | | | 4 | 400 | 1970-01-01T00:00:06 |
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
|
||||
-- FULL OUTER JOIN with complex conditions
|
||||
SELECT * FROM left_full l FULL OUTER JOIN right_full r ON l."id" = r."id" AND r."value" > 250 ORDER BY l."id" NULLS LAST, r."id" NULLS LAST;
|
||||
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
| id | name | ts | id | value | ts |
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
| 1 | Alice | 1970-01-01T00:00:01 | | | |
|
||||
| 2 | Bob | 1970-01-01T00:00:02 | | | |
|
||||
| 3 | Carol | 1970-01-01T00:00:03 | 3 | 300 | 1970-01-01T00:00:05 |
|
||||
| | | | 2 | 200 | 1970-01-01T00:00:04 |
|
||||
| | | | 4 | 400 | 1970-01-01T00:00:06 |
|
||||
+----+-------+---------------------+----+-------+---------------------+
|
||||
|
||||
DROP TABLE right_full;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE left_full;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
23
tests/cases/standalone/common/join/full_outer_join.sql
Normal file
23
tests/cases/standalone/common/join/full_outer_join.sql
Normal file
@@ -0,0 +1,23 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/full_outer/test_full_outer_join.test
|
||||
-- Tests FULL OUTER JOIN scenarios
|
||||
|
||||
CREATE TABLE left_full("id" INTEGER, "name" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE right_full("id" INTEGER, "value" INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO left_full VALUES (1, 'Alice', 1000), (2, 'Bob', 2000), (3, 'Carol', 3000);
|
||||
|
||||
INSERT INTO right_full VALUES (2, 200, 4000), (3, 300, 5000), (4, 400, 6000);
|
||||
|
||||
-- Basic FULL OUTER JOIN
|
||||
SELECT * FROM left_full l FULL OUTER JOIN right_full r ON l."id" = r."id" ORDER BY l."id" NULLS LAST, r."id" NULLS LAST;
|
||||
|
||||
-- FULL OUTER JOIN with WHERE on result
|
||||
SELECT * FROM left_full l FULL OUTER JOIN right_full r ON l."id" = r."id" WHERE l."name" IS NULL OR r."value" IS NULL ORDER BY l."id" NULLS LAST, r."id" NULLS LAST;
|
||||
|
||||
-- FULL OUTER JOIN with complex conditions
|
||||
SELECT * FROM left_full l FULL OUTER JOIN right_full r ON l."id" = r."id" AND r."value" > 250 ORDER BY l."id" NULLS LAST, r."id" NULLS LAST;
|
||||
|
||||
DROP TABLE right_full;
|
||||
|
||||
DROP TABLE left_full;
|
||||
140
tests/cases/standalone/common/join/hash_join_complex.result
Normal file
140
tests/cases/standalone/common/join/hash_join_complex.result
Normal file
@@ -0,0 +1,140 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/ hash join tests
|
||||
-- Tests complex hash join scenarios
|
||||
CREATE TABLE large_table_a("id" INTEGER, value_a VARCHAR, num_a INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE large_table_b("id" INTEGER, value_b VARCHAR, num_b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO large_table_a VALUES
|
||||
(1, 'alpha', 100, 1000), (2, 'beta', 200, 2000), (3, 'gamma', 300, 3000),
|
||||
(4, 'delta', 400, 4000), (5, 'epsilon', 500, 5000), (6, 'zeta', 600, 6000),
|
||||
(7, 'eta', 700, 7000), (8, 'theta', 800, 8000), (9, 'iota', 900, 9000),
|
||||
(10, 'kappa', 1000, 10000);
|
||||
|
||||
Affected Rows: 10
|
||||
|
||||
INSERT INTO large_table_b VALUES
|
||||
(2, 'second', 20, 1000), (4, 'fourth', 40, 2000), (6, 'sixth', 60, 3000),
|
||||
(8, 'eighth', 80, 4000), (10, 'tenth', 100, 5000), (12, 'twelfth', 120, 6000),
|
||||
(14, 'fourteenth', 140, 7000), (16, 'sixteenth', 160, 8000);
|
||||
|
||||
Affected Rows: 8
|
||||
|
||||
-- Hash join with exact match
|
||||
SELECT
|
||||
a."id", a.value_a, a.num_a, b.value_b, b.num_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
ORDER BY a."id";
|
||||
|
||||
+----+---------+-------+---------+-------+
|
||||
| id | value_a | num_a | value_b | num_b |
|
||||
+----+---------+-------+---------+-------+
|
||||
| 2 | beta | 200 | second | 20 |
|
||||
| 4 | delta | 400 | fourth | 40 |
|
||||
| 6 | zeta | 600 | sixth | 60 |
|
||||
| 8 | theta | 800 | eighth | 80 |
|
||||
| 10 | kappa | 1000 | tenth | 100 |
|
||||
+----+---------+-------+---------+-------+
|
||||
|
||||
-- Hash join with multiple key conditions
|
||||
SELECT
|
||||
a."id", a.value_a, b.value_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id" AND a.num_a > b.num_b * 5
|
||||
ORDER BY a."id";
|
||||
|
||||
+----+---------+---------+
|
||||
| id | value_a | value_b |
|
||||
+----+---------+---------+
|
||||
| 2 | beta | second |
|
||||
| 4 | delta | fourth |
|
||||
| 6 | zeta | sixth |
|
||||
| 8 | theta | eighth |
|
||||
| 10 | kappa | tenth |
|
||||
+----+---------+---------+
|
||||
|
||||
-- Hash join with aggregation on both sides
|
||||
SELECT
|
||||
joined_data."id",
|
||||
joined_data.combined_num,
|
||||
joined_data.value_concat
|
||||
FROM (
|
||||
SELECT
|
||||
a."id",
|
||||
a.num_a + b.num_b as combined_num,
|
||||
a.value_a || '-' || b.value_b as value_concat
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
) joined_data
|
||||
WHERE joined_data.combined_num > 500
|
||||
ORDER BY joined_data.combined_num DESC;
|
||||
|
||||
+----+--------------+--------------+
|
||||
| id | combined_num | value_concat |
|
||||
+----+--------------+--------------+
|
||||
| 10 | 1100 | kappa-tenth |
|
||||
| 8 | 880 | theta-eighth |
|
||||
| 6 | 660 | zeta-sixth |
|
||||
+----+--------------+--------------+
|
||||
|
||||
-- Hash join with filtering on both tables
|
||||
SELECT
|
||||
a.value_a, b.value_b, a.num_a, b.num_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
WHERE a.num_a > 500 AND b.num_b < 100
|
||||
ORDER BY a.num_a DESC;
|
||||
|
||||
+---------+---------+-------+-------+
|
||||
| value_a | value_b | num_a | num_b |
|
||||
+---------+---------+-------+-------+
|
||||
| theta | eighth | 800 | 80 |
|
||||
| zeta | sixth | 600 | 60 |
|
||||
+---------+---------+-------+-------+
|
||||
|
||||
-- Hash join for set operations
|
||||
SELECT
|
||||
a."id",
|
||||
'Both Tables' as source,
|
||||
a.value_a as value_from_a,
|
||||
b.value_b as value_from_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
UNION ALL
|
||||
SELECT
|
||||
a."id",
|
||||
'Only Table A' as source,
|
||||
a.value_a,
|
||||
NULL as value_from_b
|
||||
FROM large_table_a a
|
||||
LEFT JOIN large_table_b b ON a."id" = b."id"
|
||||
WHERE b."id" IS NULL
|
||||
ORDER BY "id", source;
|
||||
|
||||
+----+--------------+--------------+--------------+
|
||||
| id | source | value_from_a | value_from_b |
|
||||
+----+--------------+--------------+--------------+
|
||||
| 1 | Only Table A | alpha | |
|
||||
| 2 | Both Tables | beta | second |
|
||||
| 3 | Only Table A | gamma | |
|
||||
| 4 | Both Tables | delta | fourth |
|
||||
| 5 | Only Table A | epsilon | |
|
||||
| 6 | Both Tables | zeta | sixth |
|
||||
| 7 | Only Table A | eta | |
|
||||
| 8 | Both Tables | theta | eighth |
|
||||
| 9 | Only Table A | iota | |
|
||||
| 10 | Both Tables | kappa | tenth |
|
||||
+----+--------------+--------------+--------------+
|
||||
|
||||
DROP TABLE large_table_a;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE large_table_b;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
78
tests/cases/standalone/common/join/hash_join_complex.sql
Normal file
78
tests/cases/standalone/common/join/hash_join_complex.sql
Normal file
@@ -0,0 +1,78 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/ hash join tests
|
||||
-- Tests complex hash join scenarios
|
||||
|
||||
CREATE TABLE large_table_a("id" INTEGER, value_a VARCHAR, num_a INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE large_table_b("id" INTEGER, value_b VARCHAR, num_b INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO large_table_a VALUES
|
||||
(1, 'alpha', 100, 1000), (2, 'beta', 200, 2000), (3, 'gamma', 300, 3000),
|
||||
(4, 'delta', 400, 4000), (5, 'epsilon', 500, 5000), (6, 'zeta', 600, 6000),
|
||||
(7, 'eta', 700, 7000), (8, 'theta', 800, 8000), (9, 'iota', 900, 9000),
|
||||
(10, 'kappa', 1000, 10000);
|
||||
|
||||
INSERT INTO large_table_b VALUES
|
||||
(2, 'second', 20, 1000), (4, 'fourth', 40, 2000), (6, 'sixth', 60, 3000),
|
||||
(8, 'eighth', 80, 4000), (10, 'tenth', 100, 5000), (12, 'twelfth', 120, 6000),
|
||||
(14, 'fourteenth', 140, 7000), (16, 'sixteenth', 160, 8000);
|
||||
|
||||
-- Hash join with exact match
|
||||
SELECT
|
||||
a."id", a.value_a, a.num_a, b.value_b, b.num_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
ORDER BY a."id";
|
||||
|
||||
-- Hash join with multiple key conditions
|
||||
SELECT
|
||||
a."id", a.value_a, b.value_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id" AND a.num_a > b.num_b * 5
|
||||
ORDER BY a."id";
|
||||
|
||||
-- Hash join with aggregation on both sides
|
||||
SELECT
|
||||
joined_data."id",
|
||||
joined_data.combined_num,
|
||||
joined_data.value_concat
|
||||
FROM (
|
||||
SELECT
|
||||
a."id",
|
||||
a.num_a + b.num_b as combined_num,
|
||||
a.value_a || '-' || b.value_b as value_concat
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
) joined_data
|
||||
WHERE joined_data.combined_num > 500
|
||||
ORDER BY joined_data.combined_num DESC;
|
||||
|
||||
-- Hash join with filtering on both tables
|
||||
SELECT
|
||||
a.value_a, b.value_b, a.num_a, b.num_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
WHERE a.num_a > 500 AND b.num_b < 100
|
||||
ORDER BY a.num_a DESC;
|
||||
|
||||
-- Hash join for set operations
|
||||
SELECT
|
||||
a."id",
|
||||
'Both Tables' as source,
|
||||
a.value_a as value_from_a,
|
||||
b.value_b as value_from_b
|
||||
FROM large_table_a a
|
||||
INNER JOIN large_table_b b ON a."id" = b."id"
|
||||
UNION ALL
|
||||
SELECT
|
||||
a."id",
|
||||
'Only Table A' as source,
|
||||
a.value_a,
|
||||
NULL as value_from_b
|
||||
FROM large_table_a a
|
||||
LEFT JOIN large_table_b b ON a."id" = b."id"
|
||||
WHERE b."id" IS NULL
|
||||
ORDER BY "id", source;
|
||||
|
||||
DROP TABLE large_table_a;
|
||||
|
||||
DROP TABLE large_table_b;
|
||||
79
tests/cases/standalone/common/join/inequality_join.result
Normal file
79
tests/cases/standalone/common/join/inequality_join.result
Normal file
@@ -0,0 +1,79 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/inner/test_range_join.test
|
||||
-- Tests inequality JOIN conditions
|
||||
CREATE TABLE events("id" INTEGER, event_time INTEGER, duration INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE time_ranges(start_time INTEGER, end_time INTEGER, range_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO events VALUES (1, 10, 5, 1000), (2, 25, 3, 2000), (3, 45, 8, 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO time_ranges VALUES (0, 20, 'Early', 4000), (20, 40, 'Mid', 5000), (40, 60, 'Late', 6000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
-- Range join using BETWEEN
|
||||
SELECT e."id", e.event_time, t.range_name
|
||||
FROM events e JOIN time_ranges t ON e.event_time BETWEEN t.start_time AND t.end_time
|
||||
ORDER BY e."id";
|
||||
|
||||
+----+------------+------------+
|
||||
| id | event_time | range_name |
|
||||
+----+------------+------------+
|
||||
| 1 | 10 | Early |
|
||||
| 2 | 25 | Mid |
|
||||
| 3 | 45 | Late |
|
||||
+----+------------+------------+
|
||||
|
||||
-- Inequality join conditions
|
||||
SELECT e."id", e.event_time, e.duration, t.range_name
|
||||
FROM events e JOIN time_ranges t ON e.event_time >= t.start_time AND e.event_time < t.end_time
|
||||
ORDER BY e."id";
|
||||
|
||||
+----+------------+----------+------------+
|
||||
| id | event_time | duration | range_name |
|
||||
+----+------------+----------+------------+
|
||||
| 1 | 10 | 5 | Early |
|
||||
| 2 | 25 | 3 | Mid |
|
||||
| 3 | 45 | 8 | Late |
|
||||
+----+------------+----------+------------+
|
||||
|
||||
-- Join with overlap condition
|
||||
SELECT e."id", t.range_name
|
||||
FROM events e JOIN time_ranges t ON
|
||||
e.event_time < t.end_time AND (e.event_time + e.duration) > t.start_time
|
||||
ORDER BY e."id", t.start_time;
|
||||
|
||||
+----+------------+
|
||||
| id | range_name |
|
||||
+----+------------+
|
||||
| 1 | Early |
|
||||
| 2 | Mid |
|
||||
| 3 | Late |
|
||||
+----+------------+
|
||||
|
||||
-- Self join with inequality
|
||||
SELECT e1."id" as id1, e2."id" as id2, e1.event_time, e2.event_time
|
||||
FROM events e1 JOIN events e2 ON e1.event_time < e2.event_time
|
||||
ORDER BY e1."id", e2."id";
|
||||
|
||||
+-----+-----+------------+------------+
|
||||
| id1 | id2 | event_time | event_time |
|
||||
+-----+-----+------------+------------+
|
||||
| 1 | 2 | 10 | 25 |
|
||||
| 1 | 3 | 10 | 45 |
|
||||
| 2 | 3 | 25 | 45 |
|
||||
+-----+-----+------------+------------+
|
||||
|
||||
DROP TABLE time_ranges;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE events;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
35
tests/cases/standalone/common/join/inequality_join.sql
Normal file
35
tests/cases/standalone/common/join/inequality_join.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/inner/test_range_join.test
|
||||
-- Tests inequality JOIN conditions
|
||||
|
||||
CREATE TABLE events("id" INTEGER, event_time INTEGER, duration INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE time_ranges(start_time INTEGER, end_time INTEGER, range_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO events VALUES (1, 10, 5, 1000), (2, 25, 3, 2000), (3, 45, 8, 3000);
|
||||
|
||||
INSERT INTO time_ranges VALUES (0, 20, 'Early', 4000), (20, 40, 'Mid', 5000), (40, 60, 'Late', 6000);
|
||||
|
||||
-- Range join using BETWEEN
|
||||
SELECT e."id", e.event_time, t.range_name
|
||||
FROM events e JOIN time_ranges t ON e.event_time BETWEEN t.start_time AND t.end_time
|
||||
ORDER BY e."id";
|
||||
|
||||
-- Inequality join conditions
|
||||
SELECT e."id", e.event_time, e.duration, t.range_name
|
||||
FROM events e JOIN time_ranges t ON e.event_time >= t.start_time AND e.event_time < t.end_time
|
||||
ORDER BY e."id";
|
||||
|
||||
-- Join with overlap condition
|
||||
SELECT e."id", t.range_name
|
||||
FROM events e JOIN time_ranges t ON
|
||||
e.event_time < t.end_time AND (e.event_time + e.duration) > t.start_time
|
||||
ORDER BY e."id", t.start_time;
|
||||
|
||||
-- Self join with inequality
|
||||
SELECT e1."id" as id1, e2."id" as id2, e1.event_time, e2.event_time
|
||||
FROM events e1 JOIN events e2 ON e1.event_time < e2.event_time
|
||||
ORDER BY e1."id", e2."id";
|
||||
|
||||
DROP TABLE time_ranges;
|
||||
|
||||
DROP TABLE events;
|
||||
167
tests/cases/standalone/common/join/inequality_joins.result
Normal file
167
tests/cases/standalone/common/join/inequality_joins.result
Normal file
@@ -0,0 +1,167 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/iejoin/ inequality join tests
|
||||
-- Tests inequality join conditions
|
||||
CREATE TABLE time_events(event_id INTEGER, event_time TIMESTAMP, event_type VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE time_windows(window_id INTEGER, start_time TIMESTAMP, end_time TIMESTAMP, window_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO time_events VALUES
|
||||
(1, '2023-01-01 10:15:00', 'login', 1000),
|
||||
(2, '2023-01-01 10:30:00', 'purchase', 2000),
|
||||
(3, '2023-01-01 10:45:00', 'logout', 3000),
|
||||
(4, '2023-01-01 11:05:00', 'login', 4000),
|
||||
(5, '2023-01-01 11:20:00', 'view', 5000),
|
||||
(6, '2023-01-01 11:35:00', 'purchase', 6000);
|
||||
|
||||
Affected Rows: 6
|
||||
|
||||
INSERT INTO time_windows VALUES
|
||||
(1, '2023-01-01 10:00:00', '2023-01-01 10:30:00', 'Morning Early', 1000),
|
||||
(2, '2023-01-01 10:30:00', '2023-01-01 11:00:00', 'Morning Late', 2000),
|
||||
(3, '2023-01-01 11:00:00', '2023-01-01 11:30:00', 'Noon Early', 3000),
|
||||
(4, '2023-01-01 11:30:00', '2023-01-01 12:00:00', 'Noon Late', 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
-- Range join: events within time windows
|
||||
SELECT
|
||||
e.event_id, e.event_time, e.event_type, w.window_name
|
||||
FROM time_events e
|
||||
INNER JOIN time_windows w
|
||||
ON e.event_time >= w.start_time AND e.event_time < w.end_time
|
||||
ORDER BY e.event_time;
|
||||
|
||||
+----------+---------------------+------------+---------------+
|
||||
| event_id | event_time | event_type | window_name |
|
||||
+----------+---------------------+------------+---------------+
|
||||
| 1 | 2023-01-01T10:15:00 | login | Morning Early |
|
||||
| 2 | 2023-01-01T10:30:00 | purchase | Morning Late |
|
||||
| 3 | 2023-01-01T10:45:00 | logout | Morning Late |
|
||||
| 4 | 2023-01-01T11:05:00 | login | Noon Early |
|
||||
| 5 | 2023-01-01T11:20:00 | view | Noon Early |
|
||||
| 6 | 2023-01-01T11:35:00 | purchase | Noon Late |
|
||||
+----------+---------------------+------------+---------------+
|
||||
|
||||
-- Inequality join with additional conditions
|
||||
SELECT
|
||||
e.event_id, e.event_type, w.window_name
|
||||
FROM time_events e
|
||||
INNER JOIN time_windows w
|
||||
ON e.event_time >= w.start_time
|
||||
AND e.event_time < w.end_time
|
||||
AND e.event_type = 'purchase'
|
||||
ORDER BY e.event_time;
|
||||
|
||||
+----------+------------+--------------+
|
||||
| event_id | event_type | window_name |
|
||||
+----------+------------+--------------+
|
||||
| 2 | purchase | Morning Late |
|
||||
| 6 | purchase | Noon Late |
|
||||
+----------+------------+--------------+
|
||||
|
||||
-- Cross-time analysis with inequality joins
|
||||
SELECT
|
||||
e1.event_id as first_event, e2.event_id as second_event,
|
||||
e1.event_type as first_type, e2.event_type as second_type,
|
||||
e2.event_time - e1.event_time as time_diff
|
||||
FROM time_events e1
|
||||
INNER JOIN time_events e2
|
||||
ON e1.event_time < e2.event_time
|
||||
AND e2.event_time - e1.event_time <= INTERVAL '30 minutes'
|
||||
ORDER BY e1.event_time, e2.event_time;
|
||||
|
||||
+-------------+--------------+------------+-------------+-----------+
|
||||
| first_event | second_event | first_type | second_type | time_diff |
|
||||
+-------------+--------------+------------+-------------+-----------+
|
||||
| 1 | 2 | login | purchase | PT900S |
|
||||
| 1 | 3 | login | logout | PT1800S |
|
||||
| 2 | 3 | purchase | logout | PT900S |
|
||||
| 3 | 4 | logout | login | PT1200S |
|
||||
| 4 | 5 | login | view | PT900S |
|
||||
| 4 | 6 | login | purchase | PT1800S |
|
||||
| 5 | 6 | view | purchase | PT900S |
|
||||
+-------------+--------------+------------+-------------+-----------+
|
||||
|
||||
CREATE TABLE price_history(item_id INTEGER, price DOUBLE, effective_date DATE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE orders_ineq(order_id INTEGER, item_id INTEGER, order_date DATE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO price_history VALUES
|
||||
(1, 100.00, '2023-01-01', 1000), (1, 110.00, '2023-01-15', 2000),
|
||||
(2, 50.00, '2023-01-01', 3000), (2, 55.00, '2023-01-20', 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
INSERT INTO orders_ineq VALUES
|
||||
(1, 1, '2023-01-10', 1000), (2, 1, '2023-01-20', 2000),
|
||||
(3, 2, '2023-01-05', 3000), (4, 2, '2023-01-25', 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
-- Historical price lookup with inequality join
|
||||
SELECT
|
||||
o.order_id, o.order_date, p.price, p.effective_date
|
||||
FROM orders_ineq o
|
||||
INNER JOIN price_history p
|
||||
ON o.item_id = p.item_id
|
||||
AND o.order_date >= p.effective_date
|
||||
ORDER BY o.order_id;
|
||||
|
||||
+----------+------------+-------+----------------+
|
||||
| order_id | order_date | price | effective_date |
|
||||
+----------+------------+-------+----------------+
|
||||
| 1 | 2023-01-10 | 100.0 | 2023-01-01 |
|
||||
| 2 | 2023-01-20 | 100.0 | 2023-01-01 |
|
||||
| 2 | 2023-01-20 | 110.0 | 2023-01-15 |
|
||||
| 3 | 2023-01-05 | 50.0 | 2023-01-01 |
|
||||
| 4 | 2023-01-25 | 50.0 | 2023-01-01 |
|
||||
| 4 | 2023-01-25 | 55.0 | 2023-01-20 |
|
||||
+----------+------------+-------+----------------+
|
||||
|
||||
-- Latest price before order date
|
||||
SELECT
|
||||
o.order_id, o.order_date, latest_price.price
|
||||
FROM orders_ineq o
|
||||
INNER JOIN (
|
||||
SELECT
|
||||
item_id,
|
||||
price,
|
||||
effective_date,
|
||||
ROW_NUMBER() OVER (PARTITION BY item_id ORDER BY effective_date DESC) as rn
|
||||
FROM price_history
|
||||
) latest_price
|
||||
ON o.item_id = latest_price.item_id
|
||||
AND o.order_date >= latest_price.effective_date
|
||||
AND latest_price.rn = 1
|
||||
ORDER BY o.order_id;
|
||||
|
||||
+----------+------------+-------+
|
||||
| order_id | order_date | price |
|
||||
+----------+------------+-------+
|
||||
| 2 | 2023-01-20 | 110.0 |
|
||||
| 4 | 2023-01-25 | 55.0 |
|
||||
+----------+------------+-------+
|
||||
|
||||
DROP TABLE time_events;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE time_windows;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE price_history;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE orders_ineq;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
94
tests/cases/standalone/common/join/inequality_joins.sql
Normal file
94
tests/cases/standalone/common/join/inequality_joins.sql
Normal file
@@ -0,0 +1,94 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/iejoin/ inequality join tests
|
||||
-- Tests inequality join conditions
|
||||
|
||||
CREATE TABLE time_events(event_id INTEGER, event_time TIMESTAMP, event_type VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE time_windows(window_id INTEGER, start_time TIMESTAMP, end_time TIMESTAMP, window_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO time_events VALUES
|
||||
(1, '2023-01-01 10:15:00', 'login', 1000),
|
||||
(2, '2023-01-01 10:30:00', 'purchase', 2000),
|
||||
(3, '2023-01-01 10:45:00', 'logout', 3000),
|
||||
(4, '2023-01-01 11:05:00', 'login', 4000),
|
||||
(5, '2023-01-01 11:20:00', 'view', 5000),
|
||||
(6, '2023-01-01 11:35:00', 'purchase', 6000);
|
||||
|
||||
INSERT INTO time_windows VALUES
|
||||
(1, '2023-01-01 10:00:00', '2023-01-01 10:30:00', 'Morning Early', 1000),
|
||||
(2, '2023-01-01 10:30:00', '2023-01-01 11:00:00', 'Morning Late', 2000),
|
||||
(3, '2023-01-01 11:00:00', '2023-01-01 11:30:00', 'Noon Early', 3000),
|
||||
(4, '2023-01-01 11:30:00', '2023-01-01 12:00:00', 'Noon Late', 4000);
|
||||
|
||||
-- Range join: events within time windows
|
||||
SELECT
|
||||
e.event_id, e.event_time, e.event_type, w.window_name
|
||||
FROM time_events e
|
||||
INNER JOIN time_windows w
|
||||
ON e.event_time >= w.start_time AND e.event_time < w.end_time
|
||||
ORDER BY e.event_time;
|
||||
|
||||
-- Inequality join with additional conditions
|
||||
SELECT
|
||||
e.event_id, e.event_type, w.window_name
|
||||
FROM time_events e
|
||||
INNER JOIN time_windows w
|
||||
ON e.event_time >= w.start_time
|
||||
AND e.event_time < w.end_time
|
||||
AND e.event_type = 'purchase'
|
||||
ORDER BY e.event_time;
|
||||
|
||||
-- Cross-time analysis with inequality joins
|
||||
SELECT
|
||||
e1.event_id as first_event, e2.event_id as second_event,
|
||||
e1.event_type as first_type, e2.event_type as second_type,
|
||||
e2.event_time - e1.event_time as time_diff
|
||||
FROM time_events e1
|
||||
INNER JOIN time_events e2
|
||||
ON e1.event_time < e2.event_time
|
||||
AND e2.event_time - e1.event_time <= INTERVAL '30 minutes'
|
||||
ORDER BY e1.event_time, e2.event_time;
|
||||
|
||||
CREATE TABLE price_history(item_id INTEGER, price DOUBLE, effective_date DATE, ts TIMESTAMP TIME INDEX);
|
||||
CREATE TABLE orders_ineq(order_id INTEGER, item_id INTEGER, order_date DATE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO price_history VALUES
|
||||
(1, 100.00, '2023-01-01', 1000), (1, 110.00, '2023-01-15', 2000),
|
||||
(2, 50.00, '2023-01-01', 3000), (2, 55.00, '2023-01-20', 4000);
|
||||
|
||||
INSERT INTO orders_ineq VALUES
|
||||
(1, 1, '2023-01-10', 1000), (2, 1, '2023-01-20', 2000),
|
||||
(3, 2, '2023-01-05', 3000), (4, 2, '2023-01-25', 4000);
|
||||
|
||||
-- Historical price lookup with inequality join
|
||||
SELECT
|
||||
o.order_id, o.order_date, p.price, p.effective_date
|
||||
FROM orders_ineq o
|
||||
INNER JOIN price_history p
|
||||
ON o.item_id = p.item_id
|
||||
AND o.order_date >= p.effective_date
|
||||
ORDER BY o.order_id;
|
||||
|
||||
-- Latest price before order date
|
||||
SELECT
|
||||
o.order_id, o.order_date, latest_price.price
|
||||
FROM orders_ineq o
|
||||
INNER JOIN (
|
||||
SELECT
|
||||
item_id,
|
||||
price,
|
||||
effective_date,
|
||||
ROW_NUMBER() OVER (PARTITION BY item_id ORDER BY effective_date DESC) as rn
|
||||
FROM price_history
|
||||
) latest_price
|
||||
ON o.item_id = latest_price.item_id
|
||||
AND o.order_date >= latest_price.effective_date
|
||||
AND latest_price.rn = 1
|
||||
ORDER BY o.order_id;
|
||||
|
||||
DROP TABLE time_events;
|
||||
|
||||
DROP TABLE time_windows;
|
||||
|
||||
DROP TABLE price_history;
|
||||
|
||||
DROP TABLE orders_ineq;
|
||||
167
tests/cases/standalone/common/join/inner_join_advanced.result
Normal file
167
tests/cases/standalone/common/join/inner_join_advanced.result
Normal file
@@ -0,0 +1,167 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/inner/ advanced tests
|
||||
-- Tests advanced inner join patterns
|
||||
CREATE TABLE customers(cust_id INTEGER, cust_name VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE orders(order_id INTEGER, cust_id INTEGER, order_date DATE, amount DOUBLE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE order_items(item_id INTEGER, order_id INTEGER, product VARCHAR, quantity INTEGER, price DOUBLE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO customers VALUES
|
||||
(1, 'John', 'NYC', 1000), (2, 'Jane', 'LA', 2000), (3, 'Bob', 'Chicago', 3000), (4, 'Alice', 'NYC', 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
INSERT INTO orders VALUES
|
||||
(101, 1, '2023-01-01', 250.00, 1000), (102, 2, '2023-01-02', 180.00, 2000),
|
||||
(103, 1, '2023-01-03', 420.00, 3000), (104, 3, '2023-01-04', 95.00, 4000),
|
||||
(105, 4, '2023-01-05', 310.00, 5000);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
INSERT INTO order_items VALUES
|
||||
(1, 101, 'Widget', 2, 125.00, 1000), (2, 101, 'Gadget', 1, 0.00, 2000),
|
||||
(3, 102, 'Tool', 3, 60.00, 3000), (4, 103, 'Device', 1, 420.00, 4000),
|
||||
(5, 104, 'Part', 5, 19.00, 5000), (6, 105, 'Component', 2, 155.00, 6000);
|
||||
|
||||
Affected Rows: 6
|
||||
|
||||
-- Multi-table inner join
|
||||
SELECT
|
||||
c.cust_name, c.city, o.order_id, o.order_date, o.amount
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
ORDER BY o.order_date, c.cust_name;
|
||||
|
||||
+-----------+---------+----------+------------+--------+
|
||||
| cust_name | city | order_id | order_date | amount |
|
||||
+-----------+---------+----------+------------+--------+
|
||||
| John | NYC | 101 | 2023-01-01 | 250.0 |
|
||||
| Jane | LA | 102 | 2023-01-02 | 180.0 |
|
||||
| John | NYC | 103 | 2023-01-03 | 420.0 |
|
||||
| Bob | Chicago | 104 | 2023-01-04 | 95.0 |
|
||||
| Alice | NYC | 105 | 2023-01-05 | 310.0 |
|
||||
+-----------+---------+----------+------------+--------+
|
||||
|
||||
-- Three-way inner join
|
||||
SELECT
|
||||
c.cust_name, o.order_id, oi.product, oi.quantity, oi.price
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
INNER JOIN order_items oi ON o.order_id = oi.order_id
|
||||
ORDER BY c.cust_name, o.order_id, oi.product;
|
||||
|
||||
+-----------+----------+-----------+----------+-------+
|
||||
| cust_name | order_id | product | quantity | price |
|
||||
+-----------+----------+-----------+----------+-------+
|
||||
| Alice | 105 | Component | 2 | 155.0 |
|
||||
| Bob | 104 | Part | 5 | 19.0 |
|
||||
| Jane | 102 | Tool | 3 | 60.0 |
|
||||
| John | 101 | Gadget | 1 | 0.0 |
|
||||
| John | 101 | Widget | 2 | 125.0 |
|
||||
| John | 103 | Device | 1 | 420.0 |
|
||||
+-----------+----------+-----------+----------+-------+
|
||||
|
||||
-- Inner join with complex conditions
|
||||
SELECT
|
||||
c.cust_name, o.order_id, o.amount
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id AND o.amount > 200.00
|
||||
ORDER BY o.amount DESC;
|
||||
|
||||
+-----------+----------+--------+
|
||||
| cust_name | order_id | amount |
|
||||
+-----------+----------+--------+
|
||||
| John | 103 | 420.0 |
|
||||
| Alice | 105 | 310.0 |
|
||||
| John | 101 | 250.0 |
|
||||
+-----------+----------+--------+
|
||||
|
||||
-- Inner join with aggregation
|
||||
SELECT
|
||||
c.city,
|
||||
COUNT(o.order_id) as total_orders,
|
||||
SUM(o.amount) as total_amount,
|
||||
AVG(o.amount) as avg_order_amount
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
GROUP BY c.city
|
||||
ORDER BY total_amount DESC;
|
||||
|
||||
+---------+--------------+--------------+-------------------+
|
||||
| city | total_orders | total_amount | avg_order_amount |
|
||||
+---------+--------------+--------------+-------------------+
|
||||
| NYC | 3 | 980.0 | 326.6666666666667 |
|
||||
| LA | 1 | 180.0 | 180.0 |
|
||||
| Chicago | 1 | 95.0 | 95.0 |
|
||||
+---------+--------------+--------------+-------------------+
|
||||
|
||||
-- Self join
|
||||
SELECT
|
||||
o1.order_id as order1, o2.order_id as order2, o1.amount, o2.amount
|
||||
FROM orders o1
|
||||
INNER JOIN orders o2 ON o1.cust_id = o2.cust_id AND o1.order_id < o2.order_id
|
||||
ORDER BY o1.order_id, o2.order_id;
|
||||
|
||||
+--------+--------+--------+--------+
|
||||
| order1 | order2 | amount | amount |
|
||||
+--------+--------+--------+--------+
|
||||
| 101 | 103 | 250.0 | 420.0 |
|
||||
+--------+--------+--------+--------+
|
||||
|
||||
-- Join with subquery
|
||||
SELECT
|
||||
c.cust_name, high_orders.total_amount
|
||||
FROM customers c
|
||||
INNER JOIN (
|
||||
SELECT cust_id, SUM(amount) as total_amount
|
||||
FROM orders
|
||||
GROUP BY cust_id
|
||||
HAVING SUM(amount) > 300
|
||||
) high_orders ON c.cust_id = high_orders.cust_id
|
||||
ORDER BY high_orders.total_amount DESC;
|
||||
|
||||
+-----------+--------------+
|
||||
| cust_name | total_amount |
|
||||
+-----------+--------------+
|
||||
| John | 670.0 |
|
||||
| Alice | 310.0 |
|
||||
+-----------+--------------+
|
||||
|
||||
-- Join with window functions
|
||||
SELECT
|
||||
c.cust_name,
|
||||
o.order_id,
|
||||
o.amount,
|
||||
ROW_NUMBER() OVER (PARTITION BY c.cust_id ORDER BY o.order_date) as order_sequence
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
ORDER BY c.cust_name, order_sequence;
|
||||
|
||||
+-----------+----------+--------+----------------+
|
||||
| cust_name | order_id | amount | order_sequence |
|
||||
+-----------+----------+--------+----------------+
|
||||
| Alice | 105 | 310.0 | 1 |
|
||||
| Bob | 104 | 95.0 | 1 |
|
||||
| Jane | 102 | 180.0 | 1 |
|
||||
| John | 101 | 250.0 | 1 |
|
||||
| John | 103 | 420.0 | 2 |
|
||||
+-----------+----------+--------+----------------+
|
||||
|
||||
DROP TABLE customers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE orders;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE order_items;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
87
tests/cases/standalone/common/join/inner_join_advanced.sql
Normal file
87
tests/cases/standalone/common/join/inner_join_advanced.sql
Normal file
@@ -0,0 +1,87 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/inner/ advanced tests
|
||||
-- Tests advanced inner join patterns
|
||||
|
||||
CREATE TABLE customers(cust_id INTEGER, cust_name VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
CREATE TABLE orders(order_id INTEGER, cust_id INTEGER, order_date DATE, amount DOUBLE, ts TIMESTAMP TIME INDEX);
|
||||
CREATE TABLE order_items(item_id INTEGER, order_id INTEGER, product VARCHAR, quantity INTEGER, price DOUBLE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO customers VALUES
|
||||
(1, 'John', 'NYC', 1000), (2, 'Jane', 'LA', 2000), (3, 'Bob', 'Chicago', 3000), (4, 'Alice', 'NYC', 4000);
|
||||
|
||||
INSERT INTO orders VALUES
|
||||
(101, 1, '2023-01-01', 250.00, 1000), (102, 2, '2023-01-02', 180.00, 2000),
|
||||
(103, 1, '2023-01-03', 420.00, 3000), (104, 3, '2023-01-04', 95.00, 4000),
|
||||
(105, 4, '2023-01-05', 310.00, 5000);
|
||||
|
||||
INSERT INTO order_items VALUES
|
||||
(1, 101, 'Widget', 2, 125.00, 1000), (2, 101, 'Gadget', 1, 0.00, 2000),
|
||||
(3, 102, 'Tool', 3, 60.00, 3000), (4, 103, 'Device', 1, 420.00, 4000),
|
||||
(5, 104, 'Part', 5, 19.00, 5000), (6, 105, 'Component', 2, 155.00, 6000);
|
||||
|
||||
-- Multi-table inner join
|
||||
SELECT
|
||||
c.cust_name, c.city, o.order_id, o.order_date, o.amount
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
ORDER BY o.order_date, c.cust_name;
|
||||
|
||||
-- Three-way inner join
|
||||
SELECT
|
||||
c.cust_name, o.order_id, oi.product, oi.quantity, oi.price
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
INNER JOIN order_items oi ON o.order_id = oi.order_id
|
||||
ORDER BY c.cust_name, o.order_id, oi.product;
|
||||
|
||||
-- Inner join with complex conditions
|
||||
SELECT
|
||||
c.cust_name, o.order_id, o.amount
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id AND o.amount > 200.00
|
||||
ORDER BY o.amount DESC;
|
||||
|
||||
-- Inner join with aggregation
|
||||
SELECT
|
||||
c.city,
|
||||
COUNT(o.order_id) as total_orders,
|
||||
SUM(o.amount) as total_amount,
|
||||
AVG(o.amount) as avg_order_amount
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
GROUP BY c.city
|
||||
ORDER BY total_amount DESC;
|
||||
|
||||
-- Self join
|
||||
SELECT
|
||||
o1.order_id as order1, o2.order_id as order2, o1.amount, o2.amount
|
||||
FROM orders o1
|
||||
INNER JOIN orders o2 ON o1.cust_id = o2.cust_id AND o1.order_id < o2.order_id
|
||||
ORDER BY o1.order_id, o2.order_id;
|
||||
|
||||
-- Join with subquery
|
||||
SELECT
|
||||
c.cust_name, high_orders.total_amount
|
||||
FROM customers c
|
||||
INNER JOIN (
|
||||
SELECT cust_id, SUM(amount) as total_amount
|
||||
FROM orders
|
||||
GROUP BY cust_id
|
||||
HAVING SUM(amount) > 300
|
||||
) high_orders ON c.cust_id = high_orders.cust_id
|
||||
ORDER BY high_orders.total_amount DESC;
|
||||
|
||||
-- Join with window functions
|
||||
SELECT
|
||||
c.cust_name,
|
||||
o.order_id,
|
||||
o.amount,
|
||||
ROW_NUMBER() OVER (PARTITION BY c.cust_id ORDER BY o.order_date) as order_sequence
|
||||
FROM customers c
|
||||
INNER JOIN orders o ON c.cust_id = o.cust_id
|
||||
ORDER BY c.cust_name, order_sequence;
|
||||
|
||||
DROP TABLE customers;
|
||||
|
||||
DROP TABLE orders;
|
||||
|
||||
DROP TABLE order_items;
|
||||
@@ -0,0 +1,162 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/ complex condition tests
|
||||
-- Tests complex join conditions and predicates
|
||||
CREATE TABLE sales_reps(rep_id INTEGER, "name" VARCHAR, region VARCHAR, quota INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE customer_accounts(account_id INTEGER, account_name VARCHAR, region VARCHAR, rep_id INTEGER, revenue INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO sales_reps VALUES
|
||||
(1, 'Tom', 'North', 100000, 1000), (2, 'Sarah', 'South', 150000, 2000),
|
||||
(3, 'Mike', 'East', 120000, 3000), (4, 'Lisa', 'West', 180000, 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
INSERT INTO customer_accounts VALUES
|
||||
(101, 'TechCorp', 'North', 1, 85000, 1000), (102, 'DataInc', 'South', 2, 195000, 2000),
|
||||
(103, 'CloudSys', 'North', 1, 110000, 3000), (104, 'NetSoft', 'East', 3, 75000, 4000),
|
||||
(105, 'WebCo', 'West', 4, 225000, 5000), (106, 'AppDev', 'South', 2, 140000, 6000);
|
||||
|
||||
Affected Rows: 6
|
||||
|
||||
-- Join with multiple conditions
|
||||
SELECT
|
||||
sr."name" as rep_name, ca.account_name, ca.revenue
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id AND sr.region = ca.region
|
||||
ORDER BY sr.rep_id, ca.revenue DESC;
|
||||
|
||||
+----------+--------------+---------+
|
||||
| rep_name | account_name | revenue |
|
||||
+----------+--------------+---------+
|
||||
| Tom | CloudSys | 110000 |
|
||||
| Tom | TechCorp | 85000 |
|
||||
| Sarah | DataInc | 195000 |
|
||||
| Sarah | AppDev | 140000 |
|
||||
| Mike | NetSoft | 75000 |
|
||||
| Lisa | WebCo | 225000 |
|
||||
+----------+--------------+---------+
|
||||
|
||||
-- Join with inequality conditions
|
||||
SELECT
|
||||
sr."name", sr.quota, ca.account_name, ca.revenue
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id AND ca.revenue < sr.quota
|
||||
ORDER BY sr.rep_id, ca.revenue;
|
||||
|
||||
+-------+--------+--------------+---------+
|
||||
| name | quota | account_name | revenue |
|
||||
+-------+--------+--------------+---------+
|
||||
| Tom | 100000 | TechCorp | 85000 |
|
||||
| Sarah | 150000 | AppDev | 140000 |
|
||||
| Mike | 120000 | NetSoft | 75000 |
|
||||
+-------+--------+--------------+---------+
|
||||
|
||||
-- Join with range conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name, ca.revenue, sr.quota
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id
|
||||
AND ca.revenue BETWEEN sr.quota * 0.5 AND sr.quota * 1.5
|
||||
ORDER BY sr.rep_id, ca.revenue;
|
||||
|
||||
+-------+--------------+---------+--------+
|
||||
| name | account_name | revenue | quota |
|
||||
+-------+--------------+---------+--------+
|
||||
| Tom | TechCorp | 85000 | 100000 |
|
||||
| Tom | CloudSys | 110000 | 100000 |
|
||||
| Sarah | AppDev | 140000 | 150000 |
|
||||
| Sarah | DataInc | 195000 | 150000 |
|
||||
| Mike | NetSoft | 75000 | 120000 |
|
||||
| Lisa | WebCo | 225000 | 180000 |
|
||||
+-------+--------------+---------+--------+
|
||||
|
||||
-- Join with CASE in conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name, ca.revenue,
|
||||
CASE WHEN ca.revenue >= sr.quota THEN 'Met Quota' ELSE 'Below Quota' END as performance
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca ON sr.rep_id = ca.rep_id
|
||||
ORDER BY sr.rep_id, ca.revenue DESC;
|
||||
|
||||
+-------+--------------+---------+-------------+
|
||||
| name | account_name | revenue | performance |
|
||||
+-------+--------------+---------+-------------+
|
||||
| Tom | CloudSys | 110000 | Met Quota |
|
||||
| Tom | TechCorp | 85000 | Below Quota |
|
||||
| Sarah | DataInc | 195000 | Met Quota |
|
||||
| Sarah | AppDev | 140000 | Below Quota |
|
||||
| Mike | NetSoft | 75000 | Below Quota |
|
||||
| Lisa | WebCo | 225000 | Met Quota |
|
||||
+-------+--------------+---------+-------------+
|
||||
|
||||
-- Join with expression conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name,
|
||||
ca.revenue, sr.quota,
|
||||
ca.revenue - sr.quota as quota_diff
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id
|
||||
AND UPPER(sr.region) = UPPER(ca.region)
|
||||
ORDER BY quota_diff DESC, sr."name" ASC;
|
||||
|
||||
+-------+--------------+---------+--------+------------+
|
||||
| name | account_name | revenue | quota | quota_diff |
|
||||
+-------+--------------+---------+--------+------------+
|
||||
| Lisa | WebCo | 225000 | 180000 | 45000 |
|
||||
| Sarah | DataInc | 195000 | 150000 | 45000 |
|
||||
| Tom | CloudSys | 110000 | 100000 | 10000 |
|
||||
| Sarah | AppDev | 140000 | 150000 | -10000 |
|
||||
| Tom | TechCorp | 85000 | 100000 | -15000 |
|
||||
| Mike | NetSoft | 75000 | 120000 | -45000 |
|
||||
+-------+--------------+---------+--------+------------+
|
||||
|
||||
-- Join with string pattern conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id
|
||||
AND ca.account_name LIKE '%Corp%'
|
||||
ORDER BY sr."name";
|
||||
|
||||
+------+--------------+
|
||||
| name | account_name |
|
||||
+------+--------------+
|
||||
| Tom | TechCorp |
|
||||
+------+--------------+
|
||||
|
||||
-- Complex nested join conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name, ca.revenue
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca ON (
|
||||
sr.rep_id = ca.rep_id
|
||||
AND (ca.revenue > 100000 OR sr.quota < 130000)
|
||||
AND sr.region IN ('North', 'South')
|
||||
)
|
||||
ORDER BY ca.revenue DESC;
|
||||
|
||||
+-------+--------------+---------+
|
||||
| name | account_name | revenue |
|
||||
+-------+--------------+---------+
|
||||
| Sarah | DataInc | 195000 |
|
||||
| Sarah | AppDev | 140000 |
|
||||
| Tom | CloudSys | 110000 |
|
||||
| Tom | TechCorp | 85000 |
|
||||
+-------+--------------+---------+
|
||||
|
||||
DROP TABLE sales_reps;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE customer_accounts;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
-- Migrated from DuckDB test: test/sql/join/ complex condition tests
|
||||
-- Tests complex join conditions and predicates
|
||||
|
||||
CREATE TABLE sales_reps(rep_id INTEGER, "name" VARCHAR, region VARCHAR, quota INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE customer_accounts(account_id INTEGER, account_name VARCHAR, region VARCHAR, rep_id INTEGER, revenue INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO sales_reps VALUES
|
||||
(1, 'Tom', 'North', 100000, 1000), (2, 'Sarah', 'South', 150000, 2000),
|
||||
(3, 'Mike', 'East', 120000, 3000), (4, 'Lisa', 'West', 180000, 4000);
|
||||
|
||||
INSERT INTO customer_accounts VALUES
|
||||
(101, 'TechCorp', 'North', 1, 85000, 1000), (102, 'DataInc', 'South', 2, 195000, 2000),
|
||||
(103, 'CloudSys', 'North', 1, 110000, 3000), (104, 'NetSoft', 'East', 3, 75000, 4000),
|
||||
(105, 'WebCo', 'West', 4, 225000, 5000), (106, 'AppDev', 'South', 2, 140000, 6000);
|
||||
|
||||
-- Join with multiple conditions
|
||||
SELECT
|
||||
sr."name" as rep_name, ca.account_name, ca.revenue
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id AND sr.region = ca.region
|
||||
ORDER BY sr.rep_id, ca.revenue DESC;
|
||||
|
||||
-- Join with inequality conditions
|
||||
SELECT
|
||||
sr."name", sr.quota, ca.account_name, ca.revenue
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id AND ca.revenue < sr.quota
|
||||
ORDER BY sr.rep_id, ca.revenue;
|
||||
|
||||
-- Join with range conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name, ca.revenue, sr.quota
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id
|
||||
AND ca.revenue BETWEEN sr.quota * 0.5 AND sr.quota * 1.5
|
||||
ORDER BY sr.rep_id, ca.revenue;
|
||||
|
||||
-- Join with CASE in conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name, ca.revenue,
|
||||
CASE WHEN ca.revenue >= sr.quota THEN 'Met Quota' ELSE 'Below Quota' END as performance
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca ON sr.rep_id = ca.rep_id
|
||||
ORDER BY sr.rep_id, ca.revenue DESC;
|
||||
|
||||
-- Join with expression conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name,
|
||||
ca.revenue, sr.quota,
|
||||
ca.revenue - sr.quota as quota_diff
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id
|
||||
AND UPPER(sr.region) = UPPER(ca.region)
|
||||
ORDER BY quota_diff DESC, sr."name" ASC;
|
||||
|
||||
-- Join with string pattern conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca
|
||||
ON sr.rep_id = ca.rep_id
|
||||
AND ca.account_name LIKE '%Corp%'
|
||||
ORDER BY sr."name";
|
||||
|
||||
-- Complex nested join conditions
|
||||
SELECT
|
||||
sr."name", ca.account_name, ca.revenue
|
||||
FROM sales_reps sr
|
||||
INNER JOIN customer_accounts ca ON (
|
||||
sr.rep_id = ca.rep_id
|
||||
AND (ca.revenue > 100000 OR sr.quota < 130000)
|
||||
AND sr.region IN ('North', 'South')
|
||||
)
|
||||
ORDER BY ca.revenue DESC;
|
||||
|
||||
DROP TABLE sales_reps;
|
||||
|
||||
DROP TABLE customer_accounts;
|
||||
53
tests/cases/standalone/common/join/join_distinct.result
Normal file
53
tests/cases/standalone/common/join/join_distinct.result
Normal file
@@ -0,0 +1,53 @@
|
||||
-- Tests joins with DISTINCT operations
|
||||
CREATE TABLE products_dist(prod_id INTEGER, prod_name VARCHAR, category VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE sales_dist(sale_id INTEGER, prod_id INTEGER, customer VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO products_dist VALUES (1, 'Widget', 'Tools', 1000), (2, 'Gadget', 'Electronics', 2000);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
INSERT INTO sales_dist VALUES (1, 1, 'Alice', 1000), (2, 1, 'Bob', 2000), (3, 2, 'Alice', 3000), (4, 1, 'Alice', 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
SELECT DISTINCT p.category FROM products_dist p INNER JOIN sales_dist s ON p.prod_id = s.prod_id ORDER BY p.category;
|
||||
|
||||
+-------------+
|
||||
| category |
|
||||
+-------------+
|
||||
| Electronics |
|
||||
| Tools |
|
||||
+-------------+
|
||||
|
||||
SELECT DISTINCT s.customer, p.category FROM sales_dist s INNER JOIN products_dist p ON s.prod_id = p.prod_id ORDER BY s.customer, p.category;
|
||||
|
||||
+----------+-------------+
|
||||
| customer | category |
|
||||
+----------+-------------+
|
||||
| Alice | Electronics |
|
||||
| Alice | Tools |
|
||||
| Bob | Tools |
|
||||
+----------+-------------+
|
||||
|
||||
SELECT p.prod_name, COUNT(DISTINCT s.customer) as unique_customers FROM products_dist p LEFT JOIN sales_dist s ON p.prod_id = s.prod_id GROUP BY p.prod_id, p.prod_name ORDER BY unique_customers DESC;
|
||||
|
||||
+-----------+------------------+
|
||||
| prod_name | unique_customers |
|
||||
+-----------+------------------+
|
||||
| Widget | 2 |
|
||||
| Gadget | 1 |
|
||||
+-----------+------------------+
|
||||
|
||||
DROP TABLE products_dist;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE sales_dist;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
18
tests/cases/standalone/common/join/join_distinct.sql
Normal file
18
tests/cases/standalone/common/join/join_distinct.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Tests joins with DISTINCT operations
|
||||
|
||||
CREATE TABLE products_dist(prod_id INTEGER, prod_name VARCHAR, category VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE sales_dist(sale_id INTEGER, prod_id INTEGER, customer VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO products_dist VALUES (1, 'Widget', 'Tools', 1000), (2, 'Gadget', 'Electronics', 2000);
|
||||
|
||||
INSERT INTO sales_dist VALUES (1, 1, 'Alice', 1000), (2, 1, 'Bob', 2000), (3, 2, 'Alice', 3000), (4, 1, 'Alice', 4000);
|
||||
|
||||
SELECT DISTINCT p.category FROM products_dist p INNER JOIN sales_dist s ON p.prod_id = s.prod_id ORDER BY p.category;
|
||||
|
||||
SELECT DISTINCT s.customer, p.category FROM sales_dist s INNER JOIN products_dist p ON s.prod_id = p.prod_id ORDER BY s.customer, p.category;
|
||||
SELECT p.prod_name, COUNT(DISTINCT s.customer) as unique_customers FROM products_dist p LEFT JOIN sales_dist s ON p.prod_id = s.prod_id GROUP BY p.prod_id, p.prod_name ORDER BY unique_customers DESC;
|
||||
|
||||
DROP TABLE products_dist;
|
||||
|
||||
DROP TABLE sales_dist;
|
||||
62
tests/cases/standalone/common/join/join_edge_cases.result
Normal file
62
tests/cases/standalone/common/join/join_edge_cases.result
Normal file
@@ -0,0 +1,62 @@
|
||||
-- Tests join edge cases and special scenarios
|
||||
CREATE TABLE empty_table("id" INTEGER, "value" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE single_row("id" INTEGER, "data" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE duplicate_keys("id" INTEGER, "description" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO single_row VALUES (1, 'only_row', 1000);
|
||||
|
||||
Affected Rows: 1
|
||||
|
||||
INSERT INTO duplicate_keys VALUES (1, 'first', 1000), (1, 'second', 2000), (2, 'third', 3000), (2, 'fourth', 4000);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
-- Join with empty table
|
||||
SELECT s."id", s."data", e."value" FROM single_row s LEFT JOIN empty_table e ON s."id" = e."id" ORDER BY s."id";
|
||||
|
||||
+----+----------+-------+
|
||||
| id | data | value |
|
||||
+----+----------+-------+
|
||||
| 1 | only_row | |
|
||||
+----+----------+-------+
|
||||
|
||||
-- Join with duplicate keys
|
||||
SELECT s."id", s."data", d."description" FROM single_row s LEFT JOIN duplicate_keys d ON s."id" = d."id" ORDER BY d."description";
|
||||
|
||||
+----+----------+-------------+
|
||||
| id | data | description |
|
||||
+----+----------+-------------+
|
||||
| 1 | only_row | first |
|
||||
| 1 | only_row | second |
|
||||
+----+----------+-------------+
|
||||
|
||||
-- Self-join with duplicates
|
||||
SELECT d1."description" as desc1, d2."description" as desc2 FROM duplicate_keys d1 INNER JOIN duplicate_keys d2 ON d1."id" = d2."id" AND d1.ts < d2.ts ORDER BY d1.ts, d2.ts;
|
||||
|
||||
+-------+--------+
|
||||
| desc1 | desc2 |
|
||||
+-------+--------+
|
||||
| first | second |
|
||||
| third | fourth |
|
||||
+-------+--------+
|
||||
|
||||
DROP TABLE empty_table;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE single_row;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE duplicate_keys;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
26
tests/cases/standalone/common/join/join_edge_cases.sql
Normal file
26
tests/cases/standalone/common/join/join_edge_cases.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
-- Tests join edge cases and special scenarios
|
||||
|
||||
CREATE TABLE empty_table("id" INTEGER, "value" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE single_row("id" INTEGER, "data" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE duplicate_keys("id" INTEGER, "description" VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO single_row VALUES (1, 'only_row', 1000);
|
||||
|
||||
INSERT INTO duplicate_keys VALUES (1, 'first', 1000), (1, 'second', 2000), (2, 'third', 3000), (2, 'fourth', 4000);
|
||||
|
||||
-- Join with empty table
|
||||
SELECT s."id", s."data", e."value" FROM single_row s LEFT JOIN empty_table e ON s."id" = e."id" ORDER BY s."id";
|
||||
|
||||
-- Join with duplicate keys
|
||||
SELECT s."id", s."data", d."description" FROM single_row s LEFT JOIN duplicate_keys d ON s."id" = d."id" ORDER BY d."description";
|
||||
|
||||
-- Self-join with duplicates
|
||||
SELECT d1."description" as desc1, d2."description" as desc2 FROM duplicate_keys d1 INNER JOIN duplicate_keys d2 ON d1."id" = d2."id" AND d1.ts < d2.ts ORDER BY d1.ts, d2.ts;
|
||||
|
||||
DROP TABLE empty_table;
|
||||
|
||||
DROP TABLE single_row;
|
||||
|
||||
DROP TABLE duplicate_keys;
|
||||
51
tests/cases/standalone/common/join/join_large_tables.result
Normal file
51
tests/cases/standalone/common/join/join_large_tables.result
Normal file
@@ -0,0 +1,51 @@
|
||||
-- Tests joins with larger data sets
|
||||
CREATE TABLE log_entries(log_id INTEGER, user_id INTEGER, "action" VARCHAR, timestamp_val BIGINT, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE user_profiles(user_id INTEGER, username VARCHAR, signup_date DATE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO log_entries VALUES
|
||||
(1, 1, 'login', 1700000000, 1000), (2, 1, 'view_page', 1700000060, 2000), (3, 2, 'login', 1700000120, 3000),
|
||||
(4, 2, 'purchase', 1700000180, 4000), (5, 3, 'login', 1700000240, 5000), (6, 1, 'logout', 1700000300, 6000),
|
||||
(7, 3, 'view_page', 1700000360, 7000), (8, 2, 'logout', 1700000420, 8000), (9, 3, 'purchase', 1700000480, 9000),
|
||||
(10, 1, 'view_page', 1700000540, 10000), (11, 2, 'view_page', 1700000600, 11000), (12, 3, 'logout', 1700000660, 12000);
|
||||
|
||||
Affected Rows: 12
|
||||
|
||||
INSERT INTO user_profiles VALUES
|
||||
(1, 'alice_user', '2022-01-15', 1000), (2, 'bob_user', '2022-03-20', 2000), (3, 'charlie_user', '2022-06-10', 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
SELECT u.username, COUNT(l.log_id) as activity_count, COUNT(DISTINCT l.action) as unique_actions FROM user_profiles u LEFT JOIN log_entries l ON u.user_id = l.user_id GROUP BY u.user_id, u.username ORDER BY activity_count DESC, u.username DESC;
|
||||
|
||||
+--------------+----------------+----------------+
|
||||
| username | activity_count | unique_actions |
|
||||
+--------------+----------------+----------------+
|
||||
| charlie_user | 4 | 4 |
|
||||
| bob_user | 4 | 4 |
|
||||
| alice_user | 4 | 3 |
|
||||
+--------------+----------------+----------------+
|
||||
|
||||
SELECT l."action", COUNT(DISTINCT l.user_id) as unique_users, COUNT(*) as total_actions FROM log_entries l INNER JOIN user_profiles u ON l.user_id = u.user_id GROUP BY l."action" ORDER BY total_actions DESC, l."action" ASC;
|
||||
|
||||
+-----------+--------------+---------------+
|
||||
| action | unique_users | total_actions |
|
||||
+-----------+--------------+---------------+
|
||||
| view_page | 3 | 4 |
|
||||
| login | 3 | 3 |
|
||||
| logout | 3 | 3 |
|
||||
| purchase | 2 | 2 |
|
||||
+-----------+--------------+---------------+
|
||||
|
||||
DROP TABLE log_entries;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE user_profiles;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
22
tests/cases/standalone/common/join/join_large_tables.sql
Normal file
22
tests/cases/standalone/common/join/join_large_tables.sql
Normal file
@@ -0,0 +1,22 @@
|
||||
-- Tests joins with larger data sets
|
||||
|
||||
CREATE TABLE log_entries(log_id INTEGER, user_id INTEGER, "action" VARCHAR, timestamp_val BIGINT, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE user_profiles(user_id INTEGER, username VARCHAR, signup_date DATE, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO log_entries VALUES
|
||||
(1, 1, 'login', 1700000000, 1000), (2, 1, 'view_page', 1700000060, 2000), (3, 2, 'login', 1700000120, 3000),
|
||||
(4, 2, 'purchase', 1700000180, 4000), (5, 3, 'login', 1700000240, 5000), (6, 1, 'logout', 1700000300, 6000),
|
||||
(7, 3, 'view_page', 1700000360, 7000), (8, 2, 'logout', 1700000420, 8000), (9, 3, 'purchase', 1700000480, 9000),
|
||||
(10, 1, 'view_page', 1700000540, 10000), (11, 2, 'view_page', 1700000600, 11000), (12, 3, 'logout', 1700000660, 12000);
|
||||
|
||||
INSERT INTO user_profiles VALUES
|
||||
(1, 'alice_user', '2022-01-15', 1000), (2, 'bob_user', '2022-03-20', 2000), (3, 'charlie_user', '2022-06-10', 3000);
|
||||
|
||||
SELECT u.username, COUNT(l.log_id) as activity_count, COUNT(DISTINCT l.action) as unique_actions FROM user_profiles u LEFT JOIN log_entries l ON u.user_id = l.user_id GROUP BY u.user_id, u.username ORDER BY activity_count DESC, u.username DESC;
|
||||
|
||||
SELECT l."action", COUNT(DISTINCT l.user_id) as unique_users, COUNT(*) as total_actions FROM log_entries l INNER JOIN user_profiles u ON l.user_id = u.user_id GROUP BY l."action" ORDER BY total_actions DESC, l."action" ASC;
|
||||
|
||||
DROP TABLE log_entries;
|
||||
|
||||
DROP TABLE user_profiles;
|
||||
44
tests/cases/standalone/common/join/join_lateral.result
Normal file
44
tests/cases/standalone/common/join/join_lateral.result
Normal file
@@ -0,0 +1,44 @@
|
||||
-- Tests lateral join patterns and correlated subqueries
|
||||
CREATE TABLE departments_lat(dept_id INTEGER, dept_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE employees_lat(emp_id INTEGER, dept_id INTEGER, salary INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO departments_lat VALUES (1, 'Engineering', 1000), (2, 'Sales', 2000), (3, 'Marketing', 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO employees_lat VALUES (1, 1, 75000, 1000), (2, 1, 80000, 2000), (3, 2, 65000, 3000), (4, 2, 70000, 4000), (5, 3, 60000, 5000);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Correlated subquery simulating lateral join behavior
|
||||
SELECT d.dept_name, top_earners.emp_id, top_earners.salary
|
||||
FROM departments_lat d
|
||||
INNER JOIN (
|
||||
SELECT emp_id, dept_id, salary, ROW_NUMBER() OVER (PARTITION BY dept_id ORDER BY salary DESC) as rn
|
||||
FROM employees_lat
|
||||
) top_earners ON d.dept_id = top_earners.dept_id AND top_earners.rn <= 2
|
||||
ORDER BY d.dept_id, top_earners.salary DESC;
|
||||
|
||||
+-------------+--------+--------+
|
||||
| dept_name | emp_id | salary |
|
||||
+-------------+--------+--------+
|
||||
| Engineering | 2 | 80000 |
|
||||
| Engineering | 1 | 75000 |
|
||||
| Sales | 4 | 70000 |
|
||||
| Sales | 3 | 65000 |
|
||||
| Marketing | 5 | 60000 |
|
||||
+-------------+--------+--------+
|
||||
|
||||
DROP TABLE departments_lat;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE employees_lat;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
22
tests/cases/standalone/common/join/join_lateral.sql
Normal file
22
tests/cases/standalone/common/join/join_lateral.sql
Normal file
@@ -0,0 +1,22 @@
|
||||
-- Tests lateral join patterns and correlated subqueries
|
||||
|
||||
CREATE TABLE departments_lat(dept_id INTEGER, dept_name VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE employees_lat(emp_id INTEGER, dept_id INTEGER, salary INTEGER, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO departments_lat VALUES (1, 'Engineering', 1000), (2, 'Sales', 2000), (3, 'Marketing', 3000);
|
||||
|
||||
INSERT INTO employees_lat VALUES (1, 1, 75000, 1000), (2, 1, 80000, 2000), (3, 2, 65000, 3000), (4, 2, 70000, 4000), (5, 3, 60000, 5000);
|
||||
|
||||
-- Correlated subquery simulating lateral join behavior
|
||||
SELECT d.dept_name, top_earners.emp_id, top_earners.salary
|
||||
FROM departments_lat d
|
||||
INNER JOIN (
|
||||
SELECT emp_id, dept_id, salary, ROW_NUMBER() OVER (PARTITION BY dept_id ORDER BY salary DESC) as rn
|
||||
FROM employees_lat
|
||||
) top_earners ON d.dept_id = top_earners.dept_id AND top_earners.rn <= 2
|
||||
ORDER BY d.dept_id, top_earners.salary DESC;
|
||||
|
||||
DROP TABLE departments_lat;
|
||||
|
||||
DROP TABLE employees_lat;
|
||||
44
tests/cases/standalone/common/join/join_mixed_types.result
Normal file
44
tests/cases/standalone/common/join/join_mixed_types.result
Normal file
@@ -0,0 +1,44 @@
|
||||
-- Tests joins with mixed data types and conversions
|
||||
CREATE TABLE numeric_keys(int_key INTEGER, float_key DOUBLE, str_val VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE string_keys(str_key VARCHAR, int_val INTEGER, desc_val VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO numeric_keys VALUES (1, 1.0, 'first', 1000), (2, 2.0, 'second', 2000), (3, 3.0, 'third', 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO string_keys VALUES ('1', 100, 'hundred', 1000), ('2', 200, 'two_hundred', 2000), ('4', 400, 'four_hundred', 3000);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
SELECT n.int_key, n.str_val, s.int_val, s.desc_val FROM numeric_keys n INNER JOIN string_keys s ON CAST(n.int_key AS VARCHAR) = s.str_key ORDER BY n.int_key;
|
||||
|
||||
+---------+---------+---------+-------------+
|
||||
| int_key | str_val | int_val | desc_val |
|
||||
+---------+---------+---------+-------------+
|
||||
| 1 | first | 100 | hundred |
|
||||
| 2 | second | 200 | two_hundred |
|
||||
+---------+---------+---------+-------------+
|
||||
|
||||
SELECT n.float_key, s.str_key, s.int_val FROM numeric_keys n LEFT JOIN string_keys s ON n.float_key = CAST(s.str_key AS DOUBLE) ORDER BY n.float_key;
|
||||
|
||||
+-----------+---------+---------+
|
||||
| float_key | str_key | int_val |
|
||||
+-----------+---------+---------+
|
||||
| 1.0 | 1 | 100 |
|
||||
| 2.0 | 2 | 200 |
|
||||
| 3.0 | | |
|
||||
+-----------+---------+---------+
|
||||
|
||||
DROP TABLE numeric_keys;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE string_keys;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
17
tests/cases/standalone/common/join/join_mixed_types.sql
Normal file
17
tests/cases/standalone/common/join/join_mixed_types.sql
Normal file
@@ -0,0 +1,17 @@
|
||||
-- Tests joins with mixed data types and conversions
|
||||
|
||||
CREATE TABLE numeric_keys(int_key INTEGER, float_key DOUBLE, str_val VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
CREATE TABLE string_keys(str_key VARCHAR, int_val INTEGER, desc_val VARCHAR, ts TIMESTAMP TIME INDEX);
|
||||
|
||||
INSERT INTO numeric_keys VALUES (1, 1.0, 'first', 1000), (2, 2.0, 'second', 2000), (3, 3.0, 'third', 3000);
|
||||
|
||||
INSERT INTO string_keys VALUES ('1', 100, 'hundred', 1000), ('2', 200, 'two_hundred', 2000), ('4', 400, 'four_hundred', 3000);
|
||||
|
||||
SELECT n.int_key, n.str_val, s.int_val, s.desc_val FROM numeric_keys n INNER JOIN string_keys s ON CAST(n.int_key AS VARCHAR) = s.str_key ORDER BY n.int_key;
|
||||
|
||||
SELECT n.float_key, s.str_key, s.int_val FROM numeric_keys n LEFT JOIN string_keys s ON n.float_key = CAST(s.str_key AS DOUBLE) ORDER BY n.float_key;
|
||||
|
||||
DROP TABLE numeric_keys;
|
||||
|
||||
DROP TABLE string_keys;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user