mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 05:12:54 +00:00
Compare commits
20 Commits
v0.8.0-nig
...
v0.8.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
258675b75e | ||
|
|
11a08cb272 | ||
|
|
e9b178b8b9 | ||
|
|
3477fde0e5 | ||
|
|
9baa431656 | ||
|
|
e2a1cb5840 | ||
|
|
f696f41a02 | ||
|
|
0168d43d60 | ||
|
|
e372e25e30 | ||
|
|
ca409a732f | ||
|
|
5c0a530ad1 | ||
|
|
4b030456f6 | ||
|
|
f93b5b19f0 | ||
|
|
669a6d84e9 | ||
|
|
a45017ad71 | ||
|
|
0d9e71b653 | ||
|
|
93f178f3ad | ||
|
|
9f4a6c6fe2 | ||
|
|
c915916b62 | ||
|
|
dff7ba7598 |
@@ -59,6 +59,9 @@ runs:
|
||||
if: ${{ inputs.disable-run-tests == 'false' }}
|
||||
shell: pwsh
|
||||
run: make test sqlness-test
|
||||
env:
|
||||
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
- name: Upload sqlness logs
|
||||
if: ${{ failure() }} # Only upload logs when the integration tests failed.
|
||||
|
||||
1
.github/workflows/nightly-ci.yml
vendored
1
.github/workflows/nightly-ci.yml
vendored
@@ -104,6 +104,7 @@ jobs:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link"
|
||||
RUST_BACKTRACE: 1
|
||||
CARGO_INCREMENTAL: 0
|
||||
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
|
||||
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -91,7 +91,7 @@ env:
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.8.0
|
||||
NEXT_RELEASE_VERSION: v0.9.0
|
||||
|
||||
jobs:
|
||||
allocate-runners:
|
||||
|
||||
158
Cargo.lock
generated
158
Cargo.lock
generated
@@ -214,7 +214,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -703,7 +703,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -877,7 +877,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1220,7 +1220,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1254,7 +1254,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
@@ -1540,7 +1540,7 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1569,7 +1569,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.2",
|
||||
"substrait 0.17.1",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.11.0",
|
||||
@@ -1599,7 +1599,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -1613,6 +1613,7 @@ dependencies = [
|
||||
"common-catalog",
|
||||
"common-config",
|
||||
"common-error",
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-procedure",
|
||||
@@ -1629,6 +1630,7 @@ dependencies = [
|
||||
"either",
|
||||
"etcd-client",
|
||||
"file-engine",
|
||||
"flow",
|
||||
"frontend",
|
||||
"futures",
|
||||
"human-panic",
|
||||
@@ -1653,7 +1655,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.2",
|
||||
"store-api",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -1697,7 +1699,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"bitvec",
|
||||
@@ -1713,7 +1715,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -1724,7 +1726,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -1747,7 +1749,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1779,7 +1781,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"bigdecimal",
|
||||
"common-error",
|
||||
@@ -1792,7 +1794,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"snafu 0.8.2",
|
||||
"strum 0.25.0",
|
||||
@@ -1800,7 +1802,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1815,7 +1817,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1848,7 +1850,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -1865,7 +1867,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1891,7 +1893,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -1908,7 +1910,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -1923,7 +1925,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1936,7 +1938,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -1989,11 +1991,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -2018,7 +2020,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2026,7 +2028,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2041,7 +2043,7 @@ dependencies = [
|
||||
"datatypes",
|
||||
"serde",
|
||||
"snafu 0.8.2",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
"sqlparser_derive 0.1.1",
|
||||
"statrs",
|
||||
"tokio",
|
||||
@@ -2049,7 +2051,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2068,7 +2070,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -2088,7 +2090,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"backtrace",
|
||||
@@ -2115,7 +2117,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-query",
|
||||
@@ -2127,7 +2129,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2143,7 +2145,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"schemars",
|
||||
@@ -2152,7 +2154,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -3152,7 +3154,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3201,7 +3203,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.2",
|
||||
"store-api",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.12",
|
||||
@@ -3210,7 +3212,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3721,7 +3723,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -3823,7 +3825,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -3833,8 +3835,11 @@ dependencies = [
|
||||
"common-decimal",
|
||||
"common-error",
|
||||
"common-frontend",
|
||||
"common-function",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
@@ -3861,7 +3866,7 @@ dependencies = [
|
||||
"snafu 0.8.2",
|
||||
"store-api",
|
||||
"strum 0.25.0",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.11.0",
|
||||
@@ -3899,7 +3904,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -3945,7 +3950,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.2",
|
||||
"sql",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"table",
|
||||
@@ -4717,7 +4722,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -5284,7 +5289,7 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -5580,7 +5585,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5606,7 +5611,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5682,7 +5687,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -5764,7 +5769,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -6383,7 +6388,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -6624,7 +6629,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6668,9 +6673,9 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.2",
|
||||
"sql",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
"store-api",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.11.0",
|
||||
@@ -6914,7 +6919,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6930,7 +6935,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.2",
|
||||
"sql",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
"store-api",
|
||||
"table",
|
||||
]
|
||||
@@ -7260,7 +7265,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"common-base",
|
||||
@@ -7538,7 +7543,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-recursion",
|
||||
@@ -7751,7 +7756,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bitflags 2.5.0",
|
||||
@@ -7862,7 +7867,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -7919,7 +7924,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -9226,7 +9231,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -9496,7 +9501,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"aide",
|
||||
"api",
|
||||
@@ -9600,7 +9605,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -9878,7 +9883,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"chrono",
|
||||
@@ -9901,7 +9906,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"snafu 0.8.2",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
"sqlparser_derive 0.1.1",
|
||||
"table",
|
||||
]
|
||||
@@ -9934,7 +9939,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.4",
|
||||
@@ -9965,13 +9970,13 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.44.0"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0#c919990bf62ad38d2b0c0a3bc90b26ad919d51b0"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d#e4e496b8d62416ad50ce70a1b460c7313610cf5d"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
"regex",
|
||||
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9999,7 +10004,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "sqlparser_derive"
|
||||
version = "0.2.2"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0#c919990bf62ad38d2b0c0a3bc90b26ad919d51b0"
|
||||
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d#e4e496b8d62416ad50ce70a1b460c7313610cf5d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -10152,7 +10157,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -10318,7 +10323,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -10509,7 +10514,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"chrono",
|
||||
@@ -10618,7 +10623,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -10643,7 +10648,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.2",
|
||||
"sql",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
|
||||
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
|
||||
"sqlx",
|
||||
"tinytemplate",
|
||||
"tokio",
|
||||
@@ -10651,7 +10656,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -10683,6 +10688,7 @@ dependencies = [
|
||||
"datanode",
|
||||
"datatypes",
|
||||
"dotenv",
|
||||
"flow",
|
||||
"frontend",
|
||||
"futures",
|
||||
"futures-util",
|
||||
@@ -10709,7 +10715,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.7.2",
|
||||
"substrait 0.8.0",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -64,7 +64,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -159,7 +159,7 @@ smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
sysinfo = "0.30"
|
||||
# on branch v0.44.x
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "c919990bf62ad38d2b0c0a3bc90b26ad919d51b0", features = [
|
||||
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e4e496b8d62416ad50ce70a1b460c7313610cf5d", features = [
|
||||
"visitor",
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
|
||||
@@ -186,7 +186,6 @@
|
||||
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
|
||||
| `datanode` | -- | -- | Datanode options. |
|
||||
| `datanode.client` | -- | -- | Datanode client options. |
|
||||
| `datanode.client.timeout` | String | `10s` | -- |
|
||||
| `datanode.client.connect_timeout` | String | `10s` | -- |
|
||||
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
|
||||
| `logging` | -- | -- | The logging options. |
|
||||
|
||||
@@ -136,7 +136,6 @@ metadata_cache_tti = "5m"
|
||||
[datanode]
|
||||
## Datanode client options.
|
||||
[datanode.client]
|
||||
timeout = "10s"
|
||||
connect_timeout = "10s"
|
||||
tcp_nodelay = true
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
use snafu::{location, Location, Snafu};
|
||||
use tonic::{Code, Status};
|
||||
|
||||
#[derive(Snafu)]
|
||||
@@ -83,14 +83,28 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to request RegionServer, code: {}", code))]
|
||||
RegionServer { code: Code, source: BoxedError },
|
||||
RegionServer {
|
||||
code: Code,
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
// Server error carried in Tonic Status's metadata.
|
||||
#[snafu(display("{}", msg))]
|
||||
Server { code: StatusCode, msg: String },
|
||||
Server {
|
||||
code: StatusCode,
|
||||
msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Illegal Database response: {err_msg}"))]
|
||||
IllegalDatabaseResponse { err_msg: String },
|
||||
IllegalDatabaseResponse {
|
||||
err_msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to send request with streaming: {}", err_msg))]
|
||||
ClientStreaming {
|
||||
@@ -148,7 +162,11 @@ impl From<Status> for Error {
|
||||
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
|
||||
.unwrap_or_else(|| e.message().to_string());
|
||||
|
||||
Self::Server { code, msg }
|
||||
Self::Server {
|
||||
code,
|
||||
msg,
|
||||
location: location!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -189,6 +189,7 @@ impl RegionRequester {
|
||||
error::Error::RegionServer {
|
||||
code,
|
||||
source: BoxedError::new(err),
|
||||
location: location!(),
|
||||
}
|
||||
})?
|
||||
.into_inner();
|
||||
@@ -272,7 +273,7 @@ mod test {
|
||||
err_msg: "blabla".to_string(),
|
||||
}),
|
||||
}));
|
||||
let Server { code, msg } = result.unwrap_err() else {
|
||||
let Server { code, msg, .. } = result.unwrap_err() else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(code, StatusCode::Internal);
|
||||
|
||||
@@ -28,6 +28,7 @@ common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-procedure.workspace = true
|
||||
@@ -45,6 +46,7 @@ datatypes.workspace = true
|
||||
either = "1.8"
|
||||
etcd-client.workspace = true
|
||||
file-engine.workspace = true
|
||||
flow.workspace = true
|
||||
frontend.workspace = true
|
||||
futures.workspace = true
|
||||
human-panic = "1.2.2"
|
||||
|
||||
@@ -64,6 +64,10 @@ impl App for Instance {
|
||||
self.tool.do_work().await
|
||||
}
|
||||
|
||||
fn wait_signal(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
async fn stop(&self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -28,6 +29,7 @@ use snafu::{OptionExt, ResultExt};
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncWriteExt, BufWriter};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::cli::{Instance, Tool};
|
||||
use crate::error::{
|
||||
@@ -174,8 +176,34 @@ impl Export {
|
||||
}
|
||||
|
||||
/// Return a list of [`TableReference`] to be exported.
|
||||
/// Includes all tables under the given `catalog` and `schema`
|
||||
async fn get_table_list(&self, catalog: &str, schema: &str) -> Result<Vec<TableReference>> {
|
||||
/// Includes all tables under the given `catalog` and `schema`.
|
||||
async fn get_table_list(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
) -> Result<(Vec<TableReference>, Vec<TableReference>)> {
|
||||
// Puts all metric table first
|
||||
let sql = format!(
|
||||
"select table_catalog, table_schema, table_name from \
|
||||
information_schema.columns where column_name = '__tsid' \
|
||||
and table_catalog = \'{catalog}\' and table_schema = \'{schema}\'"
|
||||
);
|
||||
let result = self.sql(&sql).await?;
|
||||
let Some(records) = result else {
|
||||
EmptyResultSnafu.fail()?
|
||||
};
|
||||
let mut metric_physical_tables = HashSet::with_capacity(records.len());
|
||||
for value in records {
|
||||
let mut t = Vec::with_capacity(3);
|
||||
for v in &value {
|
||||
let serde_json::Value::String(value) = v else {
|
||||
unreachable!()
|
||||
};
|
||||
t.push(value);
|
||||
}
|
||||
metric_physical_tables.insert((t[0].clone(), t[1].clone(), t[2].clone()));
|
||||
}
|
||||
|
||||
// TODO: SQL injection hurts
|
||||
let sql = format!(
|
||||
"select table_catalog, table_schema, table_name from \
|
||||
@@ -190,10 +218,10 @@ impl Export {
|
||||
debug!("Fetched table list: {:?}", records);
|
||||
|
||||
if records.is_empty() {
|
||||
return Ok(vec![]);
|
||||
return Ok((vec![], vec![]));
|
||||
}
|
||||
|
||||
let mut result = Vec::with_capacity(records.len());
|
||||
let mut remaining_tables = Vec::with_capacity(records.len());
|
||||
for value in records {
|
||||
let mut t = Vec::with_capacity(3);
|
||||
for v in &value {
|
||||
@@ -202,10 +230,17 @@ impl Export {
|
||||
};
|
||||
t.push(value);
|
||||
}
|
||||
result.push((t[0].clone(), t[1].clone(), t[2].clone()));
|
||||
let table = (t[0].clone(), t[1].clone(), t[2].clone());
|
||||
// Ignores the physical table
|
||||
if !metric_physical_tables.contains(&table) {
|
||||
remaining_tables.push(table);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
Ok((
|
||||
metric_physical_tables.into_iter().collect(),
|
||||
remaining_tables,
|
||||
))
|
||||
}
|
||||
|
||||
async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
|
||||
@@ -225,6 +260,7 @@ impl Export {
|
||||
}
|
||||
|
||||
async fn export_create_table(&self) -> Result<()> {
|
||||
let timer = Instant::now();
|
||||
let semaphore = Arc::new(Semaphore::new(self.parallelism));
|
||||
let db_names = self.iter_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
@@ -233,15 +269,16 @@ impl Export {
|
||||
let semaphore_moved = semaphore.clone();
|
||||
tasks.push(async move {
|
||||
let _permit = semaphore_moved.acquire().await.unwrap();
|
||||
let table_list = self.get_table_list(&catalog, &schema).await?;
|
||||
let table_count = table_list.len();
|
||||
let (metric_physical_tables, remaining_tables) =
|
||||
self.get_table_list(&catalog, &schema).await?;
|
||||
let table_count = metric_physical_tables.len() + remaining_tables.len();
|
||||
tokio::fs::create_dir_all(&self.output_dir)
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let output_file =
|
||||
Path::new(&self.output_dir).join(format!("{catalog}-{schema}.sql"));
|
||||
let mut file = File::create(output_file).await.context(FileIoSnafu)?;
|
||||
for (c, s, t) in table_list {
|
||||
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
|
||||
match self.show_create_table(&c, &s, &t).await {
|
||||
Err(e) => {
|
||||
error!(e; r#"Failed to export table "{}"."{}"."{}""#, c, s, t)
|
||||
@@ -270,12 +307,14 @@ impl Export {
|
||||
})
|
||||
.count();
|
||||
|
||||
info!("success {success}/{db_count} jobs");
|
||||
let elapsed = timer.elapsed();
|
||||
info!("Success {success}/{db_count} jobs, cost: {:?}", elapsed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn export_table_data(&self) -> Result<()> {
|
||||
let timer = Instant::now();
|
||||
let semaphore = Arc::new(Semaphore::new(self.parallelism));
|
||||
let db_names = self.iter_db_names().await?;
|
||||
let db_count = db_names.len();
|
||||
@@ -288,15 +327,25 @@ impl Export {
|
||||
.await
|
||||
.context(FileIoSnafu)?;
|
||||
let output_dir = Path::new(&self.output_dir).join(format!("{catalog}-{schema}/"));
|
||||
|
||||
// copy database to
|
||||
let sql = format!(
|
||||
"copy database {} to '{}' with (format='parquet');",
|
||||
schema,
|
||||
output_dir.to_str().unwrap()
|
||||
);
|
||||
self.sql(&sql).await?;
|
||||
info!("finished exporting {catalog}.{schema} data");
|
||||
// Ignores metric physical tables
|
||||
let (metrics_tables, table_list) = self.get_table_list(&catalog, &schema).await?;
|
||||
for (_, _, table_name) in metrics_tables {
|
||||
warn!("Ignores metric physical table: {table_name}");
|
||||
}
|
||||
for (catalog_name, schema_name, table_name) in table_list {
|
||||
// copy table to
|
||||
let sql = format!(
|
||||
r#"Copy "{}"."{}"."{}" TO '{}{}.parquet' WITH (format='parquet');"#,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
output_dir.to_str().unwrap(),
|
||||
table_name,
|
||||
);
|
||||
info!("Executing sql: {sql}");
|
||||
self.sql(&sql).await?;
|
||||
}
|
||||
info!("Finished exporting {catalog}.{schema} data");
|
||||
|
||||
// export copy from sql
|
||||
let dir_filenames = match output_dir.read_dir() {
|
||||
@@ -351,8 +400,8 @@ impl Export {
|
||||
}
|
||||
})
|
||||
.count();
|
||||
|
||||
info!("success {success}/{db_count} jobs");
|
||||
let elapsed = timer.elapsed();
|
||||
info!("Success {success}/{db_count} jobs, costs: {:?}", elapsed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager, Me
|
||||
use clap::Parser;
|
||||
use client::client_manager::DatanodeClients;
|
||||
use common_config::Configurable;
|
||||
use common_grpc::channel_manager::ChannelConfig;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
@@ -318,11 +319,19 @@ impl StartCommand {
|
||||
Arc::new(executor),
|
||||
);
|
||||
|
||||
// frontend to datanode need not timeout.
|
||||
// Some queries are expected to take long time.
|
||||
let channel_config = ChannelConfig {
|
||||
timeout: None,
|
||||
..Default::default()
|
||||
};
|
||||
let client = DatanodeClients::new(channel_config);
|
||||
|
||||
let mut instance = FrontendBuilder::new(
|
||||
cached_meta_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
catalog_manager,
|
||||
Arc::new(DatanodeClients::default()),
|
||||
Arc::new(client),
|
||||
meta_client,
|
||||
)
|
||||
.with_plugin(plugins.clone())
|
||||
|
||||
@@ -41,6 +41,11 @@ pub trait App: Send {
|
||||
|
||||
async fn start(&mut self) -> error::Result<()>;
|
||||
|
||||
/// Waits the quit signal by default.
|
||||
fn wait_signal(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
async fn stop(&self) -> error::Result<()>;
|
||||
}
|
||||
|
||||
@@ -51,11 +56,13 @@ pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
|
||||
|
||||
app.start().await?;
|
||||
|
||||
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||
error!("Failed to listen for ctrl-c signal: {}", e);
|
||||
// It's unusual to fail to listen for ctrl-c signal, maybe there's something unexpected in
|
||||
// the underlying system. So we stop the app instead of running nonetheless to let people
|
||||
// investigate the issue.
|
||||
if app.wait_signal() {
|
||||
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||
error!("Failed to listen for ctrl-c signal: {}", e);
|
||||
// It's unusual to fail to listen for ctrl-c signal, maybe there's something unexpected in
|
||||
// the underlying system. So we stop the app instead of running nonetheless to let people
|
||||
// investigate the issue.
|
||||
}
|
||||
}
|
||||
|
||||
app.stop().await?;
|
||||
|
||||
@@ -45,6 +45,7 @@ use common_wal::config::StandaloneWalConfig;
|
||||
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use flow::FlownodeBuilder;
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
|
||||
@@ -426,11 +427,26 @@ impl StartCommand {
|
||||
)
|
||||
.await;
|
||||
|
||||
let table_metadata_manager =
|
||||
Self::create_table_metadata_manager(kv_backend.clone()).await?;
|
||||
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
1,
|
||||
Default::default(),
|
||||
fe_plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
);
|
||||
let flownode = Arc::new(flow_builder.build().await);
|
||||
|
||||
let builder =
|
||||
DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
|
||||
let datanode = builder.build().await.context(StartDatanodeSnafu)?;
|
||||
|
||||
let node_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
|
||||
let node_manager = Arc::new(StandaloneDatanodeManager {
|
||||
region_server: datanode.region_server(),
|
||||
flow_server: flownode.clone(),
|
||||
});
|
||||
|
||||
let table_id_sequence = Arc::new(
|
||||
SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
|
||||
@@ -448,8 +464,6 @@ impl StartCommand {
|
||||
opts.wal.into(),
|
||||
kv_backend.clone(),
|
||||
));
|
||||
let table_metadata_manager =
|
||||
Self::create_table_metadata_manager(kv_backend.clone()).await?;
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
table_id_sequence,
|
||||
@@ -482,6 +496,13 @@ impl StartCommand {
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
// flow server need to be able to use frontend to write insert requests back
|
||||
flownode
|
||||
.set_frontend_invoker(Box::new(frontend.clone()))
|
||||
.await;
|
||||
// TODO(discord9): unify with adding `start` and `shutdown` method to flownode too.
|
||||
let _handle = flownode.clone().run_background();
|
||||
|
||||
let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
|
||||
.build()
|
||||
.await
|
||||
|
||||
@@ -291,88 +291,68 @@ impl ChannelConfig {
|
||||
}
|
||||
|
||||
/// A timeout to each request.
|
||||
pub fn timeout(self, timeout: Duration) -> Self {
|
||||
Self {
|
||||
timeout: Some(timeout),
|
||||
..self
|
||||
}
|
||||
pub fn timeout(mut self, timeout: Duration) -> Self {
|
||||
self.timeout = Some(timeout);
|
||||
self
|
||||
}
|
||||
|
||||
/// A timeout to connecting to the uri.
|
||||
///
|
||||
/// Defaults to no timeout.
|
||||
pub fn connect_timeout(self, timeout: Duration) -> Self {
|
||||
Self {
|
||||
connect_timeout: Some(timeout),
|
||||
..self
|
||||
}
|
||||
pub fn connect_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.connect_timeout = Some(timeout);
|
||||
self
|
||||
}
|
||||
|
||||
/// A concurrency limit to each request.
|
||||
pub fn concurrency_limit(self, limit: usize) -> Self {
|
||||
Self {
|
||||
concurrency_limit: Some(limit),
|
||||
..self
|
||||
}
|
||||
pub fn concurrency_limit(mut self, limit: usize) -> Self {
|
||||
self.concurrency_limit = Some(limit);
|
||||
self
|
||||
}
|
||||
|
||||
/// A rate limit to each request.
|
||||
pub fn rate_limit(self, limit: u64, duration: Duration) -> Self {
|
||||
Self {
|
||||
rate_limit: Some((limit, duration)),
|
||||
..self
|
||||
}
|
||||
pub fn rate_limit(mut self, limit: u64, duration: Duration) -> Self {
|
||||
self.rate_limit = Some((limit, duration));
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the SETTINGS_INITIAL_WINDOW_SIZE option for HTTP2 stream-level flow control.
|
||||
/// Default is 65,535
|
||||
pub fn initial_stream_window_size(self, size: u32) -> Self {
|
||||
Self {
|
||||
initial_stream_window_size: Some(size),
|
||||
..self
|
||||
}
|
||||
pub fn initial_stream_window_size(mut self, size: u32) -> Self {
|
||||
self.initial_stream_window_size = Some(size);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the max connection-level flow control for HTTP2
|
||||
///
|
||||
/// Default is 65,535
|
||||
pub fn initial_connection_window_size(self, size: u32) -> Self {
|
||||
Self {
|
||||
initial_connection_window_size: Some(size),
|
||||
..self
|
||||
}
|
||||
pub fn initial_connection_window_size(mut self, size: u32) -> Self {
|
||||
self.initial_connection_window_size = Some(size);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set http2 KEEP_ALIVE_INTERVAL. Uses hyper’s default otherwise.
|
||||
pub fn http2_keep_alive_interval(self, duration: Duration) -> Self {
|
||||
Self {
|
||||
http2_keep_alive_interval: Some(duration),
|
||||
..self
|
||||
}
|
||||
pub fn http2_keep_alive_interval(mut self, duration: Duration) -> Self {
|
||||
self.http2_keep_alive_interval = Some(duration);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set http2 KEEP_ALIVE_TIMEOUT. Uses hyper’s default otherwise.
|
||||
pub fn http2_keep_alive_timeout(self, duration: Duration) -> Self {
|
||||
Self {
|
||||
http2_keep_alive_timeout: Some(duration),
|
||||
..self
|
||||
}
|
||||
pub fn http2_keep_alive_timeout(mut self, duration: Duration) -> Self {
|
||||
self.http2_keep_alive_timeout = Some(duration);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set http2 KEEP_ALIVE_WHILE_IDLE. Uses hyper’s default otherwise.
|
||||
pub fn http2_keep_alive_while_idle(self, enabled: bool) -> Self {
|
||||
Self {
|
||||
http2_keep_alive_while_idle: Some(enabled),
|
||||
..self
|
||||
}
|
||||
pub fn http2_keep_alive_while_idle(mut self, enabled: bool) -> Self {
|
||||
self.http2_keep_alive_while_idle = Some(enabled);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether to use an adaptive flow control. Uses hyper’s default otherwise.
|
||||
pub fn http2_adaptive_window(self, enabled: bool) -> Self {
|
||||
Self {
|
||||
http2_adaptive_window: Some(enabled),
|
||||
..self
|
||||
}
|
||||
pub fn http2_adaptive_window(mut self, enabled: bool) -> Self {
|
||||
self.http2_adaptive_window = Some(enabled);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set whether TCP keepalive messages are enabled on accepted connections.
|
||||
@@ -381,31 +361,25 @@ impl ChannelConfig {
|
||||
/// will be the time to remain idle before sending TCP keepalive probes.
|
||||
///
|
||||
/// Default is no keepalive (None)
|
||||
pub fn tcp_keepalive(self, duration: Duration) -> Self {
|
||||
Self {
|
||||
tcp_keepalive: Some(duration),
|
||||
..self
|
||||
}
|
||||
pub fn tcp_keepalive(mut self, duration: Duration) -> Self {
|
||||
self.tcp_keepalive = Some(duration);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the value of TCP_NODELAY option for accepted connections.
|
||||
///
|
||||
/// Enabled by default.
|
||||
pub fn tcp_nodelay(self, enabled: bool) -> Self {
|
||||
Self {
|
||||
tcp_nodelay: enabled,
|
||||
..self
|
||||
}
|
||||
pub fn tcp_nodelay(mut self, enabled: bool) -> Self {
|
||||
self.tcp_nodelay = enabled;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the value of tls client auth.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn client_tls_config(self, client_tls_option: ClientTlsOption) -> Self {
|
||||
Self {
|
||||
client_tls: Some(client_tls_option),
|
||||
..self
|
||||
}
|
||||
pub fn client_tls_config(mut self, client_tls_option: ClientTlsOption) -> Self {
|
||||
self.client_tls = Some(client_tls_option);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -119,12 +119,11 @@ impl CreateFlowProcedure {
|
||||
&sink_table_name.table_name,
|
||||
))
|
||||
.await?;
|
||||
ensure!(
|
||||
!exists,
|
||||
error::TableAlreadyExistsSnafu {
|
||||
table_name: sink_table_name.to_string(),
|
||||
}
|
||||
);
|
||||
// TODO(discord9): due to undefined behavior in flow's plan in how to transform types in mfp, sometime flow can't deduce correct schema
|
||||
// and require manually create sink table
|
||||
if exists {
|
||||
common_telemetry::warn!("Table already exists, table: {}", sink_table_name);
|
||||
}
|
||||
|
||||
self.collect_source_tables().await?;
|
||||
self.allocate_flow_id().await?;
|
||||
|
||||
@@ -516,6 +516,7 @@ mod tests {
|
||||
use common_meta::key::datanode_table::DatanodeTableManager;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -528,7 +529,7 @@ mod tests {
|
||||
let txn = mgr
|
||||
.build_create_txn(
|
||||
1028,
|
||||
"mock",
|
||||
MITO_ENGINE_NAME,
|
||||
"foo/bar/weny",
|
||||
HashMap::from([("foo".to_string(), "bar".to_string())]),
|
||||
HashMap::default(),
|
||||
@@ -542,8 +543,9 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_initialize_region_server() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (mock_region, mut mock_region_handler) = MockRegionEngine::new();
|
||||
let (mock_region, mut mock_region_handler) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
|
||||
mock_region_server.register_engine(mock_region.clone());
|
||||
|
||||
|
||||
@@ -121,6 +121,7 @@ mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::instruction::{InstructionReply, UpgradeRegion};
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::time::Instant;
|
||||
@@ -133,7 +134,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_region_not_exist() {
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (mock_engine, _) = MockRegionEngine::new();
|
||||
let (mock_engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
mock_region_server.register_engine(mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
@@ -167,13 +168,14 @@ mod tests {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
|
||||
// Should be unreachable.
|
||||
unreachable!();
|
||||
}));
|
||||
});
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
region_engine.mock_role = Some(Some(RegionRole::Leader));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
|
||||
// Should be unreachable.
|
||||
unreachable!();
|
||||
}));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
@@ -207,13 +209,14 @@ mod tests {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_secs(100));
|
||||
});
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_secs(100));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
@@ -247,13 +250,14 @@ mod tests {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(300));
|
||||
});
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(300));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let waits = vec![
|
||||
@@ -308,18 +312,19 @@ mod tests {
|
||||
let mock_region_server = mock_region_server();
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
|
||||
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "mock_error".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(100));
|
||||
});
|
||||
let (mock_engine, _) =
|
||||
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
|
||||
// Region is not ready.
|
||||
region_engine.mock_role = Some(Some(RegionRole::Follower));
|
||||
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "mock_error".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}));
|
||||
// Note: Don't change.
|
||||
region_engine.handle_request_delay = Some(Duration::from_millis(100));
|
||||
});
|
||||
mock_region_server.register_test_region(region_id, mock_engine);
|
||||
|
||||
let handler_context = HandlerContext {
|
||||
|
||||
@@ -34,6 +34,7 @@ use common_telemetry::{info, warn};
|
||||
use dashmap::DashMap;
|
||||
use futures_util::future::try_join_all;
|
||||
use metric_engine::engine::MetricEngine;
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use prost::Message;
|
||||
pub use query::dummy_catalog::{
|
||||
DummyCatalogList, DummyTableProviderFactory, TableProviderFactoryRef,
|
||||
@@ -44,7 +45,9 @@ use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
|
||||
use servers::grpc::region_server::RegionServerHandler;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY};
|
||||
use store_api::metric_engine_consts::{
|
||||
FILE_ENGINE_NAME, LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME,
|
||||
};
|
||||
use store_api::region_engine::{RegionEngineRef, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::{AffectedRows, RegionCloseRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -403,7 +406,7 @@ impl RegionServerInner {
|
||||
let current_region_status = self.region_map.get(®ion_id);
|
||||
|
||||
let engine = match region_change {
|
||||
RegionChange::Register(ref engine_type, _) => match current_region_status {
|
||||
RegionChange::Register(attribute) => match current_region_status {
|
||||
Some(status) => match status.clone() {
|
||||
RegionEngineWithStatus::Registering(_) => {
|
||||
return Ok(CurrentEngine::EarlyReturn(0))
|
||||
@@ -417,8 +420,10 @@ impl RegionServerInner {
|
||||
.engines
|
||||
.read()
|
||||
.unwrap()
|
||||
.get(engine_type)
|
||||
.with_context(|| RegionEngineNotFoundSnafu { name: engine_type })?
|
||||
.get(attribute.engine())
|
||||
.with_context(|| RegionEngineNotFoundSnafu {
|
||||
name: attribute.engine(),
|
||||
})?
|
||||
.clone(),
|
||||
},
|
||||
RegionChange::Deregisters => match current_region_status {
|
||||
@@ -461,11 +466,13 @@ impl RegionServerInner {
|
||||
.start_timer();
|
||||
|
||||
let region_change = match &request {
|
||||
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone(), false),
|
||||
RegionRequest::Create(create) => {
|
||||
let attribute = parse_region_attribute(&create.engine, &create.options)?;
|
||||
RegionChange::Register(attribute)
|
||||
}
|
||||
RegionRequest::Open(open) => {
|
||||
let is_opening_physical_region =
|
||||
open.options.contains_key(PHYSICAL_TABLE_METADATA_KEY);
|
||||
RegionChange::Register(open.engine.clone(), is_opening_physical_region)
|
||||
let attribute = parse_region_attribute(&open.engine, &open.options)?;
|
||||
RegionChange::Register(attribute)
|
||||
}
|
||||
RegionRequest::Close(_) | RegionRequest::Drop(_) => RegionChange::Deregisters,
|
||||
RegionRequest::Put(_)
|
||||
@@ -514,7 +521,7 @@ impl RegionServerInner {
|
||||
region_change: &RegionChange,
|
||||
) {
|
||||
match region_change {
|
||||
RegionChange::Register(_, _) => {
|
||||
RegionChange::Register(_) => {
|
||||
self.region_map.insert(
|
||||
region_id,
|
||||
RegionEngineWithStatus::Registering(engine.clone()),
|
||||
@@ -533,7 +540,7 @@ impl RegionServerInner {
|
||||
fn unset_region_status(&self, region_id: RegionId, region_change: RegionChange) {
|
||||
match region_change {
|
||||
RegionChange::None => {}
|
||||
RegionChange::Register(_, _) | RegionChange::Deregisters => {
|
||||
RegionChange::Register(_) | RegionChange::Deregisters => {
|
||||
self.region_map.remove(®ion_id);
|
||||
}
|
||||
}
|
||||
@@ -548,15 +555,28 @@ impl RegionServerInner {
|
||||
let engine_type = engine.name();
|
||||
match region_change {
|
||||
RegionChange::None => {}
|
||||
RegionChange::Register(_, is_opening_physical_region) => {
|
||||
if is_opening_physical_region {
|
||||
self.register_logical_regions(&engine, region_id).await?;
|
||||
}
|
||||
|
||||
info!("Region {region_id} is registered to engine {engine_type}");
|
||||
RegionChange::Register(attribute) => {
|
||||
info!(
|
||||
"Region {region_id} is registered to engine {}",
|
||||
attribute.engine()
|
||||
);
|
||||
self.region_map
|
||||
.insert(region_id, RegionEngineWithStatus::Ready(engine));
|
||||
self.event_listener.on_region_registered(region_id);
|
||||
.insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
|
||||
match attribute {
|
||||
RegionAttribute::Metric { physical } => {
|
||||
if physical {
|
||||
// Registers the logical regions belong to the physical region (`region_id`).
|
||||
self.register_logical_regions(&engine, region_id).await?;
|
||||
// We only send the `on_region_registered` event of the physical region.
|
||||
self.event_listener.on_region_registered(region_id);
|
||||
}
|
||||
}
|
||||
RegionAttribute::Mito => self.event_listener.on_region_registered(region_id),
|
||||
RegionAttribute::File => {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
RegionChange::Deregisters => {
|
||||
info!("Region {region_id} is deregistered from engine {engine_type}");
|
||||
@@ -699,10 +719,45 @@ impl RegionServerInner {
|
||||
|
||||
enum RegionChange {
|
||||
None,
|
||||
Register(String, bool),
|
||||
Register(RegionAttribute),
|
||||
Deregisters,
|
||||
}
|
||||
|
||||
fn parse_region_attribute(
|
||||
engine: &str,
|
||||
options: &HashMap<String, String>,
|
||||
) -> Result<RegionAttribute> {
|
||||
match engine {
|
||||
MITO_ENGINE_NAME => Ok(RegionAttribute::Mito),
|
||||
METRIC_ENGINE_NAME => {
|
||||
let physical = !options.contains_key(LOGICAL_TABLE_METADATA_KEY);
|
||||
|
||||
Ok(RegionAttribute::Metric { physical })
|
||||
}
|
||||
FILE_ENGINE_NAME => Ok(RegionAttribute::File),
|
||||
_ => error::UnexpectedSnafu {
|
||||
violated: format!("Unknown engine: {}", engine),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
enum RegionAttribute {
|
||||
Mito,
|
||||
Metric { physical: bool },
|
||||
File,
|
||||
}
|
||||
|
||||
impl RegionAttribute {
|
||||
fn engine(&self) -> &'static str {
|
||||
match self {
|
||||
RegionAttribute::Mito => MITO_ENGINE_NAME,
|
||||
RegionAttribute::Metric { .. } => METRIC_ENGINE_NAME,
|
||||
RegionAttribute::File => FILE_ENGINE_NAME,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -723,7 +778,7 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (engine, _receiver) = MockRegionEngine::new();
|
||||
let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
let engine_name = engine.name();
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
@@ -781,7 +836,7 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (engine, _receiver) = MockRegionEngine::new();
|
||||
let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
|
||||
@@ -832,7 +887,7 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (engine, _receiver) = MockRegionEngine::new();
|
||||
let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
|
||||
@@ -857,13 +912,15 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (engine, _receiver) =
|
||||
MockRegionEngine::with_mock_fn(Box::new(|_region_id, _request| {
|
||||
let (engine, _receiver) = MockRegionEngine::with_mock_fn(
|
||||
MITO_ENGINE_NAME,
|
||||
Box::new(|_region_id, _request| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: "test".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}));
|
||||
}),
|
||||
);
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
|
||||
@@ -904,7 +961,7 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let (engine, _) = MockRegionEngine::new();
|
||||
let (engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
@@ -950,7 +1007,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: None,
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
region_change: RegionChange::Register(RegionAttribute::Mito),
|
||||
assert: Box::new(|result| {
|
||||
let current_engine = result.unwrap();
|
||||
assert_matches!(current_engine, CurrentEngine::Engine(_));
|
||||
@@ -959,7 +1016,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
region_change: RegionChange::Register(RegionAttribute::Mito),
|
||||
assert: Box::new(|result| {
|
||||
let current_engine = result.unwrap();
|
||||
assert_matches!(current_engine, CurrentEngine::EarlyReturn(_));
|
||||
@@ -968,7 +1025,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
region_change: RegionChange::Register(RegionAttribute::Mito),
|
||||
assert: Box::new(|result| {
|
||||
let err = result.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::RegionBusy);
|
||||
@@ -977,7 +1034,7 @@ mod tests {
|
||||
CurrentEngineTest {
|
||||
region_id,
|
||||
current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
|
||||
region_change: RegionChange::Register(engine.name().to_string(), false),
|
||||
region_change: RegionChange::Register(RegionAttribute::Mito),
|
||||
assert: Box::new(|result| {
|
||||
let current_engine = result.unwrap();
|
||||
assert_matches!(current_engine, CurrentEngine::Engine(_));
|
||||
|
||||
@@ -106,10 +106,11 @@ pub struct MockRegionEngine {
|
||||
pub(crate) handle_request_delay: Option<Duration>,
|
||||
pub(crate) handle_request_mock_fn: Option<MockRequestHandler>,
|
||||
pub(crate) mock_role: Option<Option<RegionRole>>,
|
||||
engine: String,
|
||||
}
|
||||
|
||||
impl MockRegionEngine {
|
||||
pub fn new() -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
|
||||
pub fn new(engine: &str) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(8);
|
||||
|
||||
(
|
||||
@@ -118,12 +119,14 @@ impl MockRegionEngine {
|
||||
sender: tx,
|
||||
handle_request_mock_fn: None,
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
}),
|
||||
rx,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn with_mock_fn(
|
||||
engine: &str,
|
||||
mock_fn: MockRequestHandler,
|
||||
) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(8);
|
||||
@@ -134,12 +137,16 @@ impl MockRegionEngine {
|
||||
sender: tx,
|
||||
handle_request_mock_fn: Some(mock_fn),
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
}),
|
||||
rx,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn with_custom_apply_fn<F>(apply: F) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>)
|
||||
pub fn with_custom_apply_fn<F>(
|
||||
engine: &str,
|
||||
apply: F,
|
||||
) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>)
|
||||
where
|
||||
F: FnOnce(&mut MockRegionEngine),
|
||||
{
|
||||
@@ -149,6 +156,7 @@ impl MockRegionEngine {
|
||||
sender: tx,
|
||||
handle_request_mock_fn: None,
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
};
|
||||
|
||||
apply(&mut region_engine);
|
||||
@@ -160,7 +168,7 @@ impl MockRegionEngine {
|
||||
#[async_trait::async_trait]
|
||||
impl RegionEngine for MockRegionEngine {
|
||||
fn name(&self) -> &str {
|
||||
"mock"
|
||||
&self.engine
|
||||
}
|
||||
|
||||
async fn handle_request(
|
||||
|
||||
@@ -26,7 +26,10 @@ futures = "0.3"
|
||||
# This fork is simply for keeping our dependency in our org, and pin the version
|
||||
# it is the same with upstream repo
|
||||
async-trait.workspace = true
|
||||
common-function.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
enum-as-inner = "0.6.0"
|
||||
greptime-proto.workspace = true
|
||||
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }
|
||||
|
||||
@@ -14,19 +14,686 @@
|
||||
|
||||
//! for getting data from source and sending results to sink
|
||||
//! and communicating with other parts of the database
|
||||
#![warn(unused_imports)]
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime};
|
||||
|
||||
use api::v1::{RowDeleteRequest, RowDeleteRequests, RowInsertRequest, RowInsertRequests};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_frontend::handler::FrontendInvoker;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_runtime::JoinHandle;
|
||||
use common_telemetry::{debug, info};
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use datatypes::value::Value;
|
||||
use greptime_proto::v1;
|
||||
use itertools::Itertools;
|
||||
use query::{QueryEngine, QueryEngineFactory};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use session::context::QueryContext;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ConcreteDataType, RegionId};
|
||||
use table::metadata::TableId;
|
||||
use tokio::sync::{oneshot, watch, Mutex, RwLock};
|
||||
|
||||
use crate::adapter::error::{ExternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
|
||||
pub(crate) use crate::adapter::node_context::FlownodeContext;
|
||||
use crate::adapter::parse_expr::parse_fixed;
|
||||
use crate::adapter::table_source::TableSource;
|
||||
use crate::adapter::util::column_schemas_to_proto;
|
||||
use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
|
||||
use crate::compute::ErrCollector;
|
||||
use crate::expr::GlobalId;
|
||||
use crate::repr::{self, DiffRow, Row};
|
||||
use crate::transform::{register_function_to_query_engine, sql_to_flow_plan};
|
||||
|
||||
pub(crate) mod error;
|
||||
pub(crate) mod node_context;
|
||||
mod table_source;
|
||||
mod flownode_impl;
|
||||
mod parse_expr;
|
||||
mod server;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
mod util;
|
||||
|
||||
pub(crate) use node_context::FlownodeContext;
|
||||
pub(crate) use table_source::TableSource;
|
||||
|
||||
mod worker;
|
||||
|
||||
pub(crate) mod node_context;
|
||||
mod table_source;
|
||||
|
||||
use error::Error;
|
||||
|
||||
pub const PER_REQ_MAX_ROW_CNT: usize = 8192;
|
||||
|
||||
// TODO: refactor common types for flow to a separate module
|
||||
/// FlowId is a unique identifier for a flow task
|
||||
pub type FlowId = u32;
|
||||
pub type FlowId = u64;
|
||||
pub type TableName = [String; 3];
|
||||
|
||||
/// Options for flow node
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct FlownodeOptions {
|
||||
/// rpc address
|
||||
pub rpc_addr: String,
|
||||
}
|
||||
|
||||
/// Flownode Builder
|
||||
pub struct FlownodeBuilder {
|
||||
flow_node_id: u32,
|
||||
opts: FlownodeOptions,
|
||||
plugins: Plugins,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
}
|
||||
|
||||
impl FlownodeBuilder {
|
||||
/// init flownode builder
|
||||
pub fn new(
|
||||
flow_node_id: u32,
|
||||
opts: FlownodeOptions,
|
||||
plugins: Plugins,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
flow_node_id,
|
||||
opts,
|
||||
plugins,
|
||||
table_meta,
|
||||
catalog_manager,
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO(discord9): error handling
|
||||
pub async fn build(self) -> FlownodeManager {
|
||||
let query_engine_factory = QueryEngineFactory::new_with_plugins(
|
||||
// query engine in flownode only translate plan with resolved table source.
|
||||
self.catalog_manager.clone(),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
self.plugins.clone(),
|
||||
);
|
||||
let query_engine = query_engine_factory.query_engine();
|
||||
|
||||
register_function_to_query_engine(&query_engine);
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
let node_id = Some(self.flow_node_id);
|
||||
|
||||
let _handle = std::thread::spawn(move || {
|
||||
let (flow_node_manager, mut worker) =
|
||||
FlownodeManager::new_with_worker(node_id, query_engine, self.table_meta.clone());
|
||||
let _ = tx.send(flow_node_manager);
|
||||
info!("Flow Worker started in new thread");
|
||||
worker.run();
|
||||
});
|
||||
let man = rx.await.unwrap();
|
||||
info!("Flow Node Manager started");
|
||||
man
|
||||
}
|
||||
}
|
||||
|
||||
/// Arc-ed FlowNodeManager, cheaper to clone
|
||||
pub type FlownodeManagerRef = Arc<FlownodeManager>;
|
||||
|
||||
/// FlowNodeManager manages the state of all tasks in the flow node, which should be run on the same thread
|
||||
///
|
||||
/// The choice of timestamp is just using current system timestamp for now
|
||||
pub struct FlownodeManager {
|
||||
/// The handler to the worker that will run the dataflow
|
||||
/// which is `!Send` so a handle is used
|
||||
pub worker_handles: Vec<Mutex<WorkerHandle>>,
|
||||
/// The query engine that will be used to parse the query and convert it to a dataflow plan
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
/// Getting table name and table schema from table info manager
|
||||
table_info_source: TableSource,
|
||||
frontend_invoker: RwLock<Option<Box<dyn FrontendInvoker + Send + Sync>>>,
|
||||
/// contains mapping from table name to global id, and table schema
|
||||
node_context: Mutex<FlownodeContext>,
|
||||
flow_err_collectors: RwLock<BTreeMap<FlowId, ErrCollector>>,
|
||||
src_send_buf_lens: RwLock<BTreeMap<TableId, watch::Receiver<usize>>>,
|
||||
tick_manager: FlowTickManager,
|
||||
node_id: Option<u32>,
|
||||
}
|
||||
|
||||
/// Building FlownodeManager
|
||||
impl FlownodeManager {
|
||||
/// set frontend invoker
|
||||
pub async fn set_frontend_invoker(
|
||||
self: &Arc<Self>,
|
||||
frontend: Box<dyn FrontendInvoker + Send + Sync>,
|
||||
) {
|
||||
*self.frontend_invoker.write().await = Some(frontend);
|
||||
}
|
||||
|
||||
/// Create **without** setting `frontend_invoker`
|
||||
pub fn new(
|
||||
node_id: Option<u32>,
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
) -> Self {
|
||||
let srv_map = TableSource::new(
|
||||
table_meta.table_info_manager().clone(),
|
||||
table_meta.table_name_manager().clone(),
|
||||
);
|
||||
let node_context = FlownodeContext::default();
|
||||
let tick_manager = FlowTickManager::new();
|
||||
let worker_handles = Vec::new();
|
||||
FlownodeManager {
|
||||
worker_handles,
|
||||
query_engine,
|
||||
table_info_source: srv_map,
|
||||
frontend_invoker: RwLock::new(None),
|
||||
node_context: Mutex::new(node_context),
|
||||
flow_err_collectors: Default::default(),
|
||||
src_send_buf_lens: Default::default(),
|
||||
tick_manager,
|
||||
node_id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a flownode manager with one worker
|
||||
pub fn new_with_worker<'s>(
|
||||
node_id: Option<u32>,
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
) -> (Self, Worker<'s>) {
|
||||
let mut zelf = Self::new(node_id, query_engine, table_meta);
|
||||
let (handle, worker) = create_worker();
|
||||
zelf.add_worker_handle(handle);
|
||||
(zelf, worker)
|
||||
}
|
||||
|
||||
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
|
||||
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
|
||||
self.worker_handles.push(Mutex::new(handle));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DiffRequest {
|
||||
Insert(Vec<(Row, repr::Timestamp)>),
|
||||
Delete(Vec<(Row, repr::Timestamp)>),
|
||||
}
|
||||
|
||||
/// iterate through the diff row and form continuous diff row with same diff type
|
||||
pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
|
||||
let mut reqs = Vec::new();
|
||||
for (row, ts, diff) in rows {
|
||||
let last = reqs.last_mut();
|
||||
match (last, diff) {
|
||||
(Some(DiffRequest::Insert(rows)), 1) => {
|
||||
rows.push((row, ts));
|
||||
}
|
||||
(Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
|
||||
(Some(DiffRequest::Delete(rows)), -1) => {
|
||||
rows.push((row, ts));
|
||||
}
|
||||
(Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
|
||||
(None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
|
||||
(None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
reqs
|
||||
}
|
||||
|
||||
/// This impl block contains methods to send writeback requests to frontend
|
||||
impl FlownodeManager {
|
||||
/// TODO(discord9): merge all same type of diff row into one requests
|
||||
///
|
||||
/// Return the number of requests it made
|
||||
pub async fn send_writeback_requests(&self) -> Result<usize, Error> {
|
||||
let all_reqs = self.generate_writeback_request().await;
|
||||
if all_reqs.is_empty() || all_reqs.iter().all(|v| v.1.is_empty()) {
|
||||
return Ok(0);
|
||||
}
|
||||
let mut req_cnt = 0;
|
||||
for (table_name, reqs) in all_reqs {
|
||||
if reqs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let (catalog, schema) = (table_name[0].clone(), table_name[1].clone());
|
||||
let ctx = Arc::new(QueryContext::with(&catalog, &schema));
|
||||
// TODO(discord9): instead of auto build table from request schema, actually build table
|
||||
// before `create flow` to be able to assign pk and ts etc.
|
||||
let (primary_keys, schema, is_auto_create) = if let Some(table_id) = self
|
||||
.table_info_source
|
||||
.get_table_id_from_name(&table_name)
|
||||
.await?
|
||||
{
|
||||
let table_info = self
|
||||
.table_info_source
|
||||
.get_table_info_value(&table_id)
|
||||
.await?
|
||||
.unwrap();
|
||||
let meta = table_info.table_info.meta;
|
||||
let primary_keys = meta
|
||||
.primary_key_indices
|
||||
.into_iter()
|
||||
.map(|i| meta.schema.column_schemas[i].name.clone())
|
||||
.collect_vec();
|
||||
let schema = meta.schema.column_schemas;
|
||||
let is_auto_create = schema
|
||||
.last()
|
||||
.map(|s| s.name == "__ts_placeholder")
|
||||
.unwrap_or(false);
|
||||
(primary_keys, schema, is_auto_create)
|
||||
} else {
|
||||
// TODO(discord9): condiser remove buggy auto create by schema
|
||||
|
||||
let node_ctx = self.node_context.lock().await;
|
||||
let gid: GlobalId = node_ctx
|
||||
.table_repr
|
||||
.get_by_name(&table_name)
|
||||
.map(|x| x.1)
|
||||
.unwrap();
|
||||
let schema = node_ctx
|
||||
.schema
|
||||
.get(&gid)
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
name: format!("Table name = {:?}", table_name),
|
||||
})?
|
||||
.clone();
|
||||
// TODO(discord9): use default key from schema
|
||||
let primary_keys = schema
|
||||
.keys
|
||||
.first()
|
||||
.map(|v| {
|
||||
v.column_indices
|
||||
.iter()
|
||||
.map(|i| format!("Col_{i}"))
|
||||
.collect_vec()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let update_at = ColumnSchema::new(
|
||||
"update_at",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
);
|
||||
// TODO(discord9): bugged so we can't infer time index from flow plan, so we have to manually set one
|
||||
let ts_col = ColumnSchema::new(
|
||||
"__ts_placeholder",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_time_index(true);
|
||||
|
||||
let wout_ts = schema
|
||||
.column_types
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(idx, typ)| {
|
||||
ColumnSchema::new(format!("Col_{idx}"), typ.scalar_type, typ.nullable)
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let mut with_ts = wout_ts.clone();
|
||||
with_ts.push(update_at);
|
||||
with_ts.push(ts_col);
|
||||
|
||||
(primary_keys, with_ts, true)
|
||||
};
|
||||
|
||||
let proto_schema = column_schemas_to_proto(schema, &primary_keys)?;
|
||||
|
||||
debug!(
|
||||
"Sending {} writeback requests to table {}, reqs={:?}",
|
||||
reqs.len(),
|
||||
table_name.join("."),
|
||||
reqs
|
||||
);
|
||||
let now = SystemTime::now();
|
||||
let now = now
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.map(|s| s.as_millis() as repr::Timestamp)
|
||||
.unwrap_or_else(|_| {
|
||||
-(SystemTime::UNIX_EPOCH
|
||||
.duration_since(now)
|
||||
.unwrap()
|
||||
.as_millis() as repr::Timestamp)
|
||||
});
|
||||
for req in reqs {
|
||||
match req {
|
||||
DiffRequest::Insert(insert) => {
|
||||
let rows_proto: Vec<v1::Row> = insert
|
||||
.into_iter()
|
||||
.map(|(mut row, _ts)| {
|
||||
// `update_at` col
|
||||
row.extend([Value::from(common_time::Timestamp::new_millisecond(
|
||||
now,
|
||||
))]);
|
||||
// ts col, if auto create
|
||||
if is_auto_create {
|
||||
row.extend([Value::from(
|
||||
common_time::Timestamp::new_millisecond(0),
|
||||
)]);
|
||||
}
|
||||
row.into()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let table_name = table_name.last().unwrap().clone();
|
||||
let req = RowInsertRequest {
|
||||
table_name,
|
||||
rows: Some(v1::Rows {
|
||||
schema: proto_schema.clone(),
|
||||
rows: rows_proto,
|
||||
}),
|
||||
};
|
||||
req_cnt += 1;
|
||||
self.frontend_invoker
|
||||
.read()
|
||||
.await
|
||||
.as_ref()
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: "Expect a frontend invoker for flownode to write back",
|
||||
})?
|
||||
.row_inserts(RowInsertRequests { inserts: vec![req] }, ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| ExternalSnafu {})?;
|
||||
}
|
||||
DiffRequest::Delete(remove) => {
|
||||
info!("original remove rows={:?}", remove);
|
||||
let rows_proto: Vec<v1::Row> = remove
|
||||
.into_iter()
|
||||
.map(|(mut row, _ts)| {
|
||||
row.extend(Some(Value::from(
|
||||
common_time::Timestamp::new_millisecond(0),
|
||||
)));
|
||||
row.into()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let table_name = table_name.last().unwrap().clone();
|
||||
let req = RowDeleteRequest {
|
||||
table_name,
|
||||
rows: Some(v1::Rows {
|
||||
schema: proto_schema.clone(),
|
||||
rows: rows_proto,
|
||||
}),
|
||||
};
|
||||
|
||||
req_cnt += 1;
|
||||
self.frontend_invoker
|
||||
.read()
|
||||
.await
|
||||
.as_ref()
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: "Expect a frontend invoker for flownode to write back",
|
||||
})?
|
||||
.row_deletes(RowDeleteRequests { deletes: vec![req] }, ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| ExternalSnafu {})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(req_cnt)
|
||||
}
|
||||
|
||||
/// Generate writeback request for all sink table
|
||||
pub async fn generate_writeback_request(&self) -> BTreeMap<TableName, Vec<DiffRequest>> {
|
||||
let mut output = BTreeMap::new();
|
||||
for (name, sink_recv) in self
|
||||
.node_context
|
||||
.lock()
|
||||
.await
|
||||
.sink_receiver
|
||||
.iter_mut()
|
||||
.map(|(n, (_s, r))| (n, r))
|
||||
{
|
||||
let mut rows = Vec::new();
|
||||
while let Ok(row) = sink_recv.try_recv() {
|
||||
rows.push(row);
|
||||
}
|
||||
let reqs = diff_row_to_request(rows);
|
||||
output.insert(name.clone(), reqs);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
/// Flow Runtime related methods
|
||||
impl FlownodeManager {
|
||||
/// run in common_runtime background runtime
|
||||
pub fn run_background(self: Arc<Self>) -> JoinHandle<()> {
|
||||
info!("Starting flownode manager's background task");
|
||||
common_runtime::spawn_bg(async move {
|
||||
self.run().await;
|
||||
})
|
||||
}
|
||||
|
||||
/// log all flow errors
|
||||
pub async fn log_all_errors(&self) {
|
||||
for (f_id, f_err) in self.flow_err_collectors.read().await.iter() {
|
||||
let all_errors = f_err.get_all().await;
|
||||
if !all_errors.is_empty() {
|
||||
let all_errors = all_errors
|
||||
.into_iter()
|
||||
.map(|i| format!("{:?}", i))
|
||||
.join("\n");
|
||||
common_telemetry::error!("Flow {} has following errors: {}", f_id, all_errors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trigger dataflow running, and then send writeback request to the source sender
|
||||
///
|
||||
/// note that this method didn't handle input mirror request, as this should be handled by grpc server
|
||||
pub async fn run(&self) {
|
||||
debug!("Starting to run");
|
||||
loop {
|
||||
// TODO(discord9): only run when new inputs arrive or scheduled to
|
||||
self.run_available().await.unwrap();
|
||||
// TODO(discord9): error handling
|
||||
self.send_writeback_requests().await.unwrap();
|
||||
self.log_all_errors().await;
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Run all available subgraph in the flow node
|
||||
/// This will try to run all dataflow in this node
|
||||
///
|
||||
/// However this is not blocking and can sometimes return while actual computation is still running in worker thread
|
||||
/// TODO(discord9): add flag for subgraph that have input since last run
|
||||
pub async fn run_available(&self) -> Result<(), Error> {
|
||||
let now = self.tick_manager.tick();
|
||||
|
||||
loop {
|
||||
for worker in self.worker_handles.iter() {
|
||||
// TODO(discord9): consider how to handle error in individual worker
|
||||
worker.lock().await.run_available(now).await.unwrap();
|
||||
}
|
||||
// first check how many inputs were sent
|
||||
let send_cnt = match self.node_context.lock().await.flush_all_sender() {
|
||||
Ok(cnt) => cnt,
|
||||
Err(err) => {
|
||||
common_telemetry::error!("Flush send buf errors: {:?}", err);
|
||||
break;
|
||||
}
|
||||
};
|
||||
// if no inputs
|
||||
if send_cnt == 0 {
|
||||
break;
|
||||
} else {
|
||||
debug!("FlownodeManager::run_available: send_cnt={}", send_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// send write request to related source sender
|
||||
pub async fn handle_write_request(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
rows: Vec<DiffRow>,
|
||||
) -> Result<(), Error> {
|
||||
debug!(
|
||||
"Handling write request for region_id={:?} with {} rows",
|
||||
region_id,
|
||||
rows.len()
|
||||
);
|
||||
let table_id = region_id.table_id();
|
||||
self.node_context.lock().await.send(table_id, rows)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create&Remove flow
|
||||
impl FlownodeManager {
|
||||
/// remove a flow by it's id
|
||||
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
|
||||
for handle in self.worker_handles.iter() {
|
||||
let handle = handle.lock().await;
|
||||
if handle.contains_flow(flow_id).await? {
|
||||
handle.remove_flow(flow_id).await?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return task id if a new task is created, otherwise return None
|
||||
///
|
||||
/// steps to create task:
|
||||
/// 1. parse query into typed plan(and optional parse expire_when expr)
|
||||
/// 2. render source/sink with output table id and used input table id
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create_flow(
|
||||
&self,
|
||||
flow_id: FlowId,
|
||||
sink_table_name: TableName,
|
||||
source_table_ids: &[TableId],
|
||||
create_if_not_exist: bool,
|
||||
expire_when: Option<String>,
|
||||
comment: Option<String>,
|
||||
sql: String,
|
||||
flow_options: HashMap<String, String>,
|
||||
query_ctx: Option<QueryContext>,
|
||||
) -> Result<Option<FlowId>, Error> {
|
||||
if create_if_not_exist {
|
||||
// check if the task already exists
|
||||
for handle in self.worker_handles.iter() {
|
||||
if handle.lock().await.contains_flow(flow_id).await? {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut node_ctx = self.node_context.lock().await;
|
||||
// assign global id to source and sink table
|
||||
for source in source_table_ids {
|
||||
node_ctx
|
||||
.assign_global_id_to_table(&self.table_info_source, None, Some(*source))
|
||||
.await?;
|
||||
}
|
||||
node_ctx
|
||||
.assign_global_id_to_table(&self.table_info_source, Some(sink_table_name.clone()), None)
|
||||
.await?;
|
||||
|
||||
node_ctx.register_task_src_sink(flow_id, source_table_ids, sink_table_name.clone());
|
||||
|
||||
node_ctx.query_context = query_ctx.map(Arc::new);
|
||||
// construct a active dataflow state with it
|
||||
let flow_plan = sql_to_flow_plan(&mut node_ctx, &self.query_engine, &sql).await?;
|
||||
debug!("Flow {:?}'s Plan is {:?}", flow_id, flow_plan);
|
||||
node_ctx.assign_table_schema(&sink_table_name, flow_plan.typ.clone())?;
|
||||
|
||||
let expire_when = expire_when
|
||||
.and_then(|s| {
|
||||
if s.is_empty() || s.split_whitespace().join("").is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s)
|
||||
}
|
||||
})
|
||||
.map(|d| {
|
||||
let d = d.as_ref();
|
||||
parse_fixed(d)
|
||||
.map(|(_, n)| n)
|
||||
.map_err(|err| err.to_string())
|
||||
})
|
||||
.transpose()
|
||||
.map_err(|err| UnexpectedSnafu { reason: err }.build())?;
|
||||
let _ = comment;
|
||||
let _ = flow_options;
|
||||
|
||||
// TODO(discord9): add more than one handles
|
||||
let sink_id = node_ctx.table_repr.get_by_name(&sink_table_name).unwrap().1;
|
||||
let sink_sender = node_ctx.get_sink_by_global_id(&sink_id)?;
|
||||
|
||||
let source_ids = source_table_ids
|
||||
.iter()
|
||||
.map(|id| node_ctx.table_repr.get_by_table_id(id).unwrap().1)
|
||||
.collect_vec();
|
||||
let source_receivers = source_ids
|
||||
.iter()
|
||||
.map(|id| {
|
||||
node_ctx
|
||||
.get_source_by_global_id(id)
|
||||
.map(|s| s.get_receiver())
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let err_collector = ErrCollector::default();
|
||||
self.flow_err_collectors
|
||||
.write()
|
||||
.await
|
||||
.insert(flow_id, err_collector.clone());
|
||||
let handle = &self.worker_handles[0].lock().await;
|
||||
let create_request = worker::Request::Create {
|
||||
flow_id,
|
||||
plan: flow_plan,
|
||||
sink_id,
|
||||
sink_sender,
|
||||
source_ids,
|
||||
src_recvs: source_receivers,
|
||||
expire_when,
|
||||
create_if_not_exist,
|
||||
err_collector,
|
||||
};
|
||||
handle.create_flow(create_request).await?;
|
||||
info!("Successfully create flow with id={}", flow_id);
|
||||
Ok(Some(flow_id))
|
||||
}
|
||||
}
|
||||
|
||||
/// FlowTickManager is a manager for flow tick, which trakc flow execution progress
|
||||
///
|
||||
/// TODO(discord9): better way to do it, and not expose flow tick even to other flow to avoid
|
||||
/// TSO coord mess
|
||||
#[derive(Clone)]
|
||||
pub struct FlowTickManager {
|
||||
start: Instant,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for FlowTickManager {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("FlowTickManager").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl FlowTickManager {
|
||||
pub fn new() -> Self {
|
||||
FlowTickManager {
|
||||
start: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the current timestamp in milliseconds
|
||||
///
|
||||
/// TODO(discord9): reconsider since `tick()` require a monotonic clock and also need to survive recover later
|
||||
pub fn tick(&self) -> repr::Timestamp {
|
||||
let current = Instant::now();
|
||||
let since_the_epoch = current - self.start;
|
||||
since_the_epoch.as_millis() as repr::Timestamp
|
||||
}
|
||||
}
|
||||
|
||||
117
src/flow/src/adapter/flownode_impl.rs
Normal file
117
src/flow/src/adapter/flownode_impl.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! impl `FlowNode` trait for FlowNodeManager so standalone can call them
|
||||
|
||||
use api::v1::flow::{flow_request, CreateRequest, DropRequest, FlowRequest, FlowResponse};
|
||||
use api::v1::region::InsertRequests;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::error::{ExternalSnafu, Result, UnexpectedSnafu};
|
||||
use common_meta::node_manager::Flownode;
|
||||
use itertools::Itertools;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::adapter::FlownodeManager;
|
||||
use crate::repr::{self, DiffRow};
|
||||
|
||||
fn to_meta_err(err: crate::adapter::error::Error) -> common_meta::error::Error {
|
||||
// TODO(discord9): refactor this
|
||||
Err::<(), _>(BoxedError::new(err))
|
||||
.with_context(|_| ExternalSnafu)
|
||||
.unwrap_err()
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Flownode for FlownodeManager {
|
||||
async fn handle(&self, request: FlowRequest) -> Result<FlowResponse> {
|
||||
let query_ctx = request
|
||||
.header
|
||||
.and_then(|h| h.query_context)
|
||||
.map(|ctx| ctx.into());
|
||||
match request.body {
|
||||
Some(flow_request::Body::Create(CreateRequest {
|
||||
flow_id: Some(task_id),
|
||||
source_table_ids,
|
||||
sink_table_name: Some(sink_table_name),
|
||||
create_if_not_exists,
|
||||
expire_when,
|
||||
comment,
|
||||
sql,
|
||||
flow_options,
|
||||
})) => {
|
||||
let source_table_ids = source_table_ids.into_iter().map(|id| id.id).collect_vec();
|
||||
let sink_table_name = [
|
||||
sink_table_name.catalog_name,
|
||||
sink_table_name.schema_name,
|
||||
sink_table_name.table_name,
|
||||
];
|
||||
let ret = self
|
||||
.create_flow(
|
||||
task_id.id as u64,
|
||||
sink_table_name,
|
||||
&source_table_ids,
|
||||
create_if_not_exists,
|
||||
Some(expire_when),
|
||||
Some(comment),
|
||||
sql,
|
||||
flow_options,
|
||||
query_ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(to_meta_err)?;
|
||||
Ok(FlowResponse {
|
||||
affected_flows: ret
|
||||
.map(|id| greptime_proto::v1::FlowId { id: id as u32 })
|
||||
.into_iter()
|
||||
.collect_vec(),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
Some(flow_request::Body::Drop(DropRequest {
|
||||
flow_id: Some(flow_id),
|
||||
})) => {
|
||||
self.remove_flow(flow_id.id as u64)
|
||||
.await
|
||||
.map_err(to_meta_err)?;
|
||||
Ok(Default::default())
|
||||
}
|
||||
None => UnexpectedSnafu {
|
||||
err_msg: "Missing request body",
|
||||
}
|
||||
.fail(),
|
||||
_ => UnexpectedSnafu {
|
||||
err_msg: "Invalid request body.",
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
|
||||
for write_request in request.requests {
|
||||
let region_id = write_request.region_id;
|
||||
let rows_proto = write_request.rows.map(|r| r.rows).unwrap_or(vec![]);
|
||||
// TODO(discord9): reconsider time assignment mechanism
|
||||
let now = self.tick_manager.tick();
|
||||
let rows: Vec<DiffRow> = rows_proto
|
||||
.into_iter()
|
||||
.map(repr::Row::from)
|
||||
.map(|r| (r, now, 1))
|
||||
.collect_vec();
|
||||
self.handle_write_request(region_id.into(), rows)
|
||||
.await
|
||||
.map_err(to_meta_err)?;
|
||||
}
|
||||
Ok(Default::default())
|
||||
}
|
||||
}
|
||||
@@ -30,7 +30,7 @@ use crate::expr::GlobalId;
|
||||
use crate::repr::{DiffRow, RelationType, BROADCAST_CAP};
|
||||
|
||||
/// A context that holds the information of the dataflow
|
||||
#[derive(Default)]
|
||||
#[derive(Default, Debug)]
|
||||
pub struct FlownodeContext {
|
||||
/// mapping from source table to tasks, useful for schedule which task to run when a source table is updated
|
||||
pub source_to_tasks: BTreeMap<TableId, BTreeSet<FlowId>>,
|
||||
@@ -64,6 +64,7 @@ pub struct FlownodeContext {
|
||||
///
|
||||
/// receiver still use tokio broadcast channel, since only sender side need to know
|
||||
/// backpressure and adjust dataflow running duration to avoid blocking
|
||||
#[derive(Debug)]
|
||||
pub struct SourceSender {
|
||||
sender: broadcast::Sender<DiffRow>,
|
||||
send_buf: VecDeque<DiffRow>,
|
||||
|
||||
245
src/flow/src/adapter/parse_expr.rs
Normal file
245
src/flow/src/adapter/parse_expr.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! parse expr like "ts <= now() - interval '5 m'"
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, tag_no_case};
|
||||
use nom::character::complete::{alphanumeric1, digit0, multispace0};
|
||||
use nom::combinator::peek;
|
||||
use nom::sequence::tuple;
|
||||
use nom::IResult;
|
||||
|
||||
use crate::repr;
|
||||
|
||||
#[test]
|
||||
fn test_parse_duration() {
|
||||
let input = "1 h 5 m 42 second";
|
||||
let (remain, ttl) = parse_duration(input).unwrap();
|
||||
assert_eq!(remain, "");
|
||||
assert_eq!(ttl, (3600 + 5 * 60 + 42) * 1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fixed() {
|
||||
let input = "timestamp < now() - INTERVAL '5m 42s'";
|
||||
let (remain, ttl) = parse_fixed(input).unwrap();
|
||||
assert_eq!(remain, "");
|
||||
assert_eq!(ttl, (5 * 60 + 42) * 1000);
|
||||
}
|
||||
|
||||
pub fn parse_fixed(input: &str) -> IResult<&str, i64> {
|
||||
let (r, _) = tuple((
|
||||
multispace0,
|
||||
tag_no_case("timestamp"),
|
||||
multispace0,
|
||||
tag("<"),
|
||||
multispace0,
|
||||
tag_no_case("now()"),
|
||||
multispace0,
|
||||
tag("-"),
|
||||
multispace0,
|
||||
tag_no_case("interval"),
|
||||
multispace0,
|
||||
))(input)?;
|
||||
tuple((tag("'"), parse_duration, tag("'")))(r).map(|(r, (_, ttl, _))| (r, ttl))
|
||||
}
|
||||
|
||||
/// parse duration and return ttl, currently only support time part of psql interval type
|
||||
pub fn parse_duration(input: &str) -> IResult<&str, i64> {
|
||||
let mut intervals = vec![];
|
||||
let mut remain = input;
|
||||
while peek(parse_quality)(remain).is_ok() {
|
||||
let (r, number) = parse_quality(remain)?;
|
||||
let (r, unit) = parse_time_unit(r)?;
|
||||
intervals.push((number, unit));
|
||||
remain = r;
|
||||
}
|
||||
let mut total = 0;
|
||||
for (number, unit) in intervals {
|
||||
let number = match unit {
|
||||
TimeUnit::Second => number,
|
||||
TimeUnit::Minute => number * 60,
|
||||
TimeUnit::Hour => number * 60 * 60,
|
||||
};
|
||||
total += number;
|
||||
}
|
||||
total *= 1000;
|
||||
Ok((remain, total))
|
||||
}
|
||||
|
||||
enum Expr {
|
||||
Col(String),
|
||||
Now,
|
||||
Duration(repr::Duration),
|
||||
Binary {
|
||||
left: Box<Expr>,
|
||||
op: String,
|
||||
right: Box<Expr>,
|
||||
},
|
||||
}
|
||||
|
||||
fn parse_expr(input: &str) -> IResult<&str, Expr> {
|
||||
parse_expr_bp(input, 0)
|
||||
}
|
||||
|
||||
/// a simple pratt parser
|
||||
fn parse_expr_bp(input: &str, min_bp: u8) -> IResult<&str, Expr> {
|
||||
let (mut input, mut lhs): (&str, Expr) = parse_item(input)?;
|
||||
loop {
|
||||
let (r, op) = parse_op(input)?;
|
||||
let (_, (l_bp, r_bp)) = infix_binding_power(op)?;
|
||||
if l_bp < min_bp {
|
||||
return Ok((input, lhs));
|
||||
}
|
||||
let (r, rhs) = parse_expr_bp(r, r_bp)?;
|
||||
input = r;
|
||||
lhs = Expr::Binary {
|
||||
left: Box::new(lhs),
|
||||
op: op.to_string(),
|
||||
right: Box::new(rhs),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_op(input: &str) -> IResult<&str, &str> {
|
||||
alt((parse_add_sub, parse_cmp))(input)
|
||||
}
|
||||
|
||||
fn parse_item(input: &str) -> IResult<&str, Expr> {
|
||||
if let Ok((r, name)) = parse_col_name(input) {
|
||||
Ok((r, Expr::Col(name.to_string())))
|
||||
} else if let Ok((r, _now)) = parse_now(input) {
|
||||
Ok((r, Expr::Now))
|
||||
} else if let Ok((_r, _num)) = parse_quality(input) {
|
||||
todo!()
|
||||
} else {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
fn infix_binding_power(op: &str) -> IResult<&str, (u8, u8)> {
|
||||
let ret = match op {
|
||||
"<" | ">" | "<=" | ">=" => (1, 2),
|
||||
"+" | "-" => (3, 4),
|
||||
_ => {
|
||||
return Err(nom::Err::Error(nom::error::Error::new(
|
||||
op,
|
||||
nom::error::ErrorKind::Fail,
|
||||
)))
|
||||
}
|
||||
};
|
||||
Ok((op, ret))
|
||||
}
|
||||
|
||||
fn parse_col_name(input: &str) -> IResult<&str, &str> {
|
||||
tuple((multispace0, alphanumeric1, multispace0))(input).map(|(r, (_, name, _))| (r, name))
|
||||
}
|
||||
|
||||
fn parse_now(input: &str) -> IResult<&str, &str> {
|
||||
tag_no_case("now()")(input)
|
||||
}
|
||||
|
||||
fn parse_add_sub(input: &str) -> IResult<&str, &str> {
|
||||
tuple((multispace0, alt((tag("+"), tag("-"))), multispace0))(input)
|
||||
.map(|(r, (_, op, _))| (r, op))
|
||||
}
|
||||
|
||||
fn parse_cmp(input: &str) -> IResult<&str, &str> {
|
||||
tuple((
|
||||
multispace0,
|
||||
alt((tag("<="), tag(">="), tag("<"), tag(">"))),
|
||||
multispace0,
|
||||
))(input)
|
||||
.map(|(r, (_, op, _))| (r, op))
|
||||
}
|
||||
|
||||
/// parse a number with optional sign
|
||||
fn parse_quality(input: &str) -> IResult<&str, repr::Duration> {
|
||||
tuple((
|
||||
multispace0,
|
||||
alt((tag("+"), tag("-"), tag(""))),
|
||||
digit0,
|
||||
multispace0,
|
||||
))(input)
|
||||
.map(|(r, (_, sign, name, _))| (r, sign, name))
|
||||
.and_then(|(r, sign, name)| {
|
||||
let num = name.parse::<repr::Duration>().map_err(|_| {
|
||||
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
|
||||
})?;
|
||||
let num = match sign {
|
||||
"+" => num,
|
||||
"-" => -num,
|
||||
_ => num,
|
||||
};
|
||||
Ok((r, num))
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum TimeUnit {
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum DateUnit {
|
||||
Day,
|
||||
Month,
|
||||
Year,
|
||||
}
|
||||
|
||||
fn parse_time_unit(input: &str) -> IResult<&str, TimeUnit> {
|
||||
fn to_second(input: &str) -> IResult<&str, TimeUnit> {
|
||||
alt((
|
||||
tag_no_case("second"),
|
||||
tag_no_case("seconds"),
|
||||
tag_no_case("S"),
|
||||
))(input)
|
||||
.map(move |(r, _)| (r, TimeUnit::Second))
|
||||
}
|
||||
fn to_minute(input: &str) -> IResult<&str, TimeUnit> {
|
||||
alt((
|
||||
tag_no_case("minute"),
|
||||
tag_no_case("minutes"),
|
||||
tag_no_case("m"),
|
||||
))(input)
|
||||
.map(move |(r, _)| (r, TimeUnit::Minute))
|
||||
}
|
||||
fn to_hour(input: &str) -> IResult<&str, TimeUnit> {
|
||||
alt((tag_no_case("hour"), tag_no_case("hours"), tag_no_case("h")))(input)
|
||||
.map(move |(r, _)| (r, TimeUnit::Hour))
|
||||
}
|
||||
|
||||
tuple((
|
||||
multispace0,
|
||||
alt((
|
||||
to_second, to_minute,
|
||||
to_hour, /*
|
||||
tag_no_case("day"),
|
||||
tag_no_case("days"),
|
||||
tag_no_case("d"),
|
||||
tag_no_case("month"),
|
||||
tag_no_case("months"),
|
||||
tag_no_case("m"),
|
||||
tag_no_case("year"),
|
||||
tag_no_case("years"),
|
||||
tag_no_case("y"),
|
||||
*/
|
||||
)),
|
||||
multispace0,
|
||||
))(input)
|
||||
.map(|(r, (_, unit, _))| (r, unit))
|
||||
}
|
||||
147
src/flow/src/adapter/server.rs
Normal file
147
src/flow/src/adapter/server.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Implementation of grpc service for flow node
|
||||
|
||||
use std::net::SocketAddr;
|
||||
|
||||
use common_meta::node_manager::Flownode;
|
||||
use common_telemetry::tracing::info;
|
||||
use futures::FutureExt;
|
||||
use greptime_proto::v1::flow::{flow_server, FlowRequest, FlowResponse, InsertRequests};
|
||||
use itertools::Itertools;
|
||||
use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::sync::{oneshot, Mutex};
|
||||
use tonic::transport::server::TcpIncoming;
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
use crate::adapter::FlownodeManagerRef;
|
||||
pub const FLOW_NODE_SERVER_NAME: &str = "FLOW_NODE_SERVER";
|
||||
|
||||
/// wrapping flow node manager to avoid orphan rule with Arc<...>
|
||||
#[derive(Clone)]
|
||||
pub struct FlowService {
|
||||
pub manager: FlownodeManagerRef,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl flow_server::Flow for FlowService {
|
||||
async fn handle_create_remove(
|
||||
&self,
|
||||
request: Request<FlowRequest>,
|
||||
) -> Result<Response<FlowResponse>, Status> {
|
||||
let request = request.into_inner();
|
||||
self.manager
|
||||
.handle(request)
|
||||
.await
|
||||
.map(Response::new)
|
||||
.map_err(|e| {
|
||||
let msg = format!("failed to handle request: {:?}", e);
|
||||
Status::internal(msg)
|
||||
})
|
||||
}
|
||||
|
||||
async fn handle_mirror_request(
|
||||
&self,
|
||||
request: Request<InsertRequests>,
|
||||
) -> Result<Response<FlowResponse>, Status> {
|
||||
let request = request.into_inner();
|
||||
// TODO(discord9): fix protobuf import order shenanigans to remove this duplicated define
|
||||
let request = api::v1::region::InsertRequests {
|
||||
requests: request
|
||||
.requests
|
||||
.into_iter()
|
||||
.map(|insert| api::v1::region::InsertRequest {
|
||||
region_id: insert.region_id,
|
||||
rows: insert.rows,
|
||||
})
|
||||
.collect_vec(),
|
||||
};
|
||||
self.manager
|
||||
.handle_inserts(request)
|
||||
.await
|
||||
.map(Response::new)
|
||||
.map_err(|e| {
|
||||
let msg = format!("failed to handle request: {:?}", e);
|
||||
Status::internal(msg)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FlownodeServer {
|
||||
pub shutdown_tx: Mutex<Option<oneshot::Sender<()>>>,
|
||||
pub flow_service: FlowService,
|
||||
}
|
||||
|
||||
impl FlownodeServer {
|
||||
pub fn create_flow_service(&self) -> flow_server::FlowServer<impl flow_server::Flow> {
|
||||
flow_server::FlowServer::new(self.flow_service.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl servers::server::Server for FlownodeServer {
|
||||
async fn shutdown(&self) -> Result<(), servers::error::Error> {
|
||||
let mut shutdown_tx = self.shutdown_tx.lock().await;
|
||||
if let Some(tx) = shutdown_tx.take() {
|
||||
if tx.send(()).is_err() {
|
||||
info!("Receiver dropped, the flow node server has already shutdown");
|
||||
}
|
||||
}
|
||||
info!("Shutdown flow node server");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
async fn start(&self, addr: SocketAddr) -> Result<SocketAddr, servers::error::Error> {
|
||||
let (tx, rx) = oneshot::channel::<()>();
|
||||
let (incoming, addr) = {
|
||||
let mut shutdown_tx = self.shutdown_tx.lock().await;
|
||||
ensure!(
|
||||
shutdown_tx.is_none(),
|
||||
AlreadyStartedSnafu { server: "flow" }
|
||||
);
|
||||
let listener = TcpListener::bind(addr)
|
||||
.await
|
||||
.context(TcpBindSnafu { addr })?;
|
||||
let addr = listener.local_addr().context(TcpBindSnafu { addr })?;
|
||||
let incoming =
|
||||
TcpIncoming::from_listener(listener, true, None).context(TcpIncomingSnafu)?;
|
||||
info!("flow server is bound to {}", addr);
|
||||
|
||||
*shutdown_tx = Some(tx);
|
||||
|
||||
(incoming, addr)
|
||||
};
|
||||
|
||||
let builder = tonic::transport::Server::builder().add_service(self.create_flow_service());
|
||||
let _handle = common_runtime::spawn_bg(async move {
|
||||
let _result = builder
|
||||
.serve_with_incoming_shutdown(incoming, rx.map(drop))
|
||||
.await
|
||||
.context(StartGrpcSnafu);
|
||||
});
|
||||
|
||||
// TODO(discord9): better place for dataflow to run per second
|
||||
let manager_ref = self.flow_service.manager.clone();
|
||||
let _handle = manager_ref.clone().run_background();
|
||||
|
||||
Ok(addr)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
FLOW_NODE_SERVER_NAME
|
||||
}
|
||||
}
|
||||
64
src/flow/src/adapter/tests.rs
Normal file
64
src/flow/src/adapter/tests.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Mock test for adapter module
|
||||
//! TODO(discord9): write mock test
|
||||
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use store_api::storage::ConcreteDataType;
|
||||
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
|
||||
|
||||
use super::*;
|
||||
|
||||
pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
region_numbers: I,
|
||||
) -> TableInfo {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.version(123)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let meta = TableMetaBuilder::default()
|
||||
.schema(Arc::new(schema))
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.region_numbers(region_numbers.into_iter().collect::<Vec<_>>())
|
||||
.build()
|
||||
.unwrap();
|
||||
TableInfoBuilder::default()
|
||||
.table_id(table_id)
|
||||
.table_version(5)
|
||||
.name(table_name)
|
||||
.meta(meta)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Create a mock harness for flow node manager
|
||||
///
|
||||
/// containing several default table info and schema
|
||||
fn mock_harness_flow_node_manager() {}
|
||||
@@ -15,15 +15,16 @@
|
||||
//! For single-thread flow worker
|
||||
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
use enum_as_inner::EnumAsInner;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use tokio::sync::{broadcast, mpsc, Mutex};
|
||||
|
||||
use crate::adapter::error::{Error, FlowAlreadyExistSnafu, InternalSnafu};
|
||||
use crate::adapter::error::{Error, FlowAlreadyExistSnafu, InternalSnafu, UnexpectedSnafu};
|
||||
use crate::adapter::FlowId;
|
||||
use crate::compute::{Context, DataflowState, ErrCollector};
|
||||
use crate::expr::GlobalId;
|
||||
@@ -39,6 +40,7 @@ pub fn create_worker<'a>() -> (WorkerHandle, Worker<'a>) {
|
||||
let (itc_client, itc_server) = create_inter_thread_call();
|
||||
let worker_handle = WorkerHandle {
|
||||
itc_client: Mutex::new(itc_client),
|
||||
shutdown: AtomicBool::new(false),
|
||||
};
|
||||
let worker = Worker {
|
||||
task_states: BTreeMap::new(),
|
||||
@@ -105,6 +107,7 @@ impl<'subgraph> ActiveDataflowState<'subgraph> {
|
||||
#[derive(Debug)]
|
||||
pub struct WorkerHandle {
|
||||
itc_client: Mutex<InterThreadCallClient>,
|
||||
shutdown: AtomicBool,
|
||||
}
|
||||
|
||||
impl WorkerHandle {
|
||||
@@ -123,7 +126,7 @@ impl WorkerHandle {
|
||||
.itc_client
|
||||
.lock()
|
||||
.await
|
||||
.call_blocking(create_reqs)
|
||||
.call_with_resp(create_reqs)
|
||||
.await?;
|
||||
ret.into_create().map_err(|ret| {
|
||||
InternalSnafu {
|
||||
@@ -138,7 +141,7 @@ impl WorkerHandle {
|
||||
/// remove task, return task id
|
||||
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<bool, Error> {
|
||||
let req = Request::Remove { flow_id };
|
||||
let ret = self.itc_client.lock().await.call_blocking(req).await?;
|
||||
let ret = self.itc_client.lock().await.call_with_resp(req).await?;
|
||||
|
||||
ret.into_remove().map_err(|ret| {
|
||||
InternalSnafu {
|
||||
@@ -157,13 +160,12 @@ impl WorkerHandle {
|
||||
self.itc_client
|
||||
.lock()
|
||||
.await
|
||||
.call_non_blocking(Request::RunAvail { now })
|
||||
.await
|
||||
.call_no_resp(Request::RunAvail { now })
|
||||
}
|
||||
|
||||
pub async fn contains_flow(&self, flow_id: FlowId) -> Result<bool, Error> {
|
||||
let req = Request::ContainTask { flow_id };
|
||||
let ret = self.itc_client.lock().await.call_blocking(req).await?;
|
||||
let ret = self.itc_client.lock().await.call_with_resp(req).await?;
|
||||
|
||||
ret.into_contain_task().map_err(|ret| {
|
||||
InternalSnafu {
|
||||
@@ -177,11 +179,37 @@ impl WorkerHandle {
|
||||
|
||||
/// shutdown the worker
|
||||
pub async fn shutdown(&self) -> Result<(), Error> {
|
||||
self.itc_client
|
||||
.lock()
|
||||
.await
|
||||
.call_non_blocking(Request::Shutdown)
|
||||
.await
|
||||
if !self.shutdown.fetch_or(true, Ordering::SeqCst) {
|
||||
self.itc_client.lock().await.call_no_resp(Request::Shutdown)
|
||||
} else {
|
||||
UnexpectedSnafu {
|
||||
reason: "Worker already shutdown",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
/// shutdown the worker
|
||||
pub fn shutdown_blocking(&self) -> Result<(), Error> {
|
||||
if !self.shutdown.fetch_or(true, Ordering::SeqCst) {
|
||||
self.itc_client
|
||||
.blocking_lock()
|
||||
.call_no_resp(Request::Shutdown)
|
||||
} else {
|
||||
UnexpectedSnafu {
|
||||
reason: "Worker already shutdown",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WorkerHandle {
|
||||
fn drop(&mut self) {
|
||||
if let Err(err) = self.shutdown_blocking() {
|
||||
common_telemetry::error!("Fail to shutdown worker: {:?}", err)
|
||||
}
|
||||
info!("Flow Worker shutdown due to Worker Handle dropped.")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,7 +236,6 @@ impl<'s> Worker<'s> {
|
||||
create_if_not_exist: bool,
|
||||
err_collector: ErrCollector,
|
||||
) -> Result<Option<FlowId>, Error> {
|
||||
let _ = expire_when;
|
||||
let already_exist = self.task_states.contains_key(&flow_id);
|
||||
match (already_exist, create_if_not_exist) {
|
||||
(true, true) => return Ok(None),
|
||||
@@ -220,6 +247,7 @@ impl<'s> Worker<'s> {
|
||||
err_collector,
|
||||
..Default::default()
|
||||
};
|
||||
cur_task_state.state.set_expire_after(expire_when);
|
||||
|
||||
{
|
||||
let mut ctx = cur_task_state.new_ctx(sink_id);
|
||||
@@ -395,7 +423,7 @@ struct InterThreadCallClient {
|
||||
|
||||
impl InterThreadCallClient {
|
||||
/// call without expecting responses or blocking
|
||||
async fn call_non_blocking(&self, req: Request) -> Result<(), Error> {
|
||||
fn call_no_resp(&self, req: Request) -> Result<(), Error> {
|
||||
// TODO(discord9): relax memory order later
|
||||
let call_id = self.call_id.fetch_add(1, Ordering::SeqCst);
|
||||
self.arg_sender
|
||||
@@ -404,7 +432,7 @@ impl InterThreadCallClient {
|
||||
}
|
||||
|
||||
/// call blocking, and return the result
|
||||
async fn call_blocking(&mut self, req: Request) -> Result<Response, Error> {
|
||||
async fn call_with_resp(&mut self, req: Request) -> Result<Response, Error> {
|
||||
// TODO(discord9): relax memory order later
|
||||
let call_id = self.call_id.fetch_add(1, Ordering::SeqCst);
|
||||
self.arg_sender
|
||||
|
||||
@@ -111,7 +111,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
|
||||
input,
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
} => self.render_reduce(input, key_val_plan, reduce_plan),
|
||||
} => self.render_reduce(input, key_val_plan, reduce_plan, plan.typ),
|
||||
Plan::Join { .. } => NotImplementedSnafu {
|
||||
reason: "Join is still WIP",
|
||||
}
|
||||
@@ -223,11 +223,11 @@ mod test {
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::handoff::VecHandoff;
|
||||
use pretty_assertions::{assert_eq, assert_ne};
|
||||
|
||||
use super::*;
|
||||
use crate::expr::BinaryFunc;
|
||||
use crate::repr::Row;
|
||||
|
||||
pub fn run_and_check(
|
||||
state: &mut DataflowState,
|
||||
df: &mut Hydroflow,
|
||||
@@ -238,6 +238,12 @@ mod test {
|
||||
for now in time_range {
|
||||
state.set_current_ts(now);
|
||||
state.run_available_with_schedule(df);
|
||||
if !state.get_err_collector().is_empty() {
|
||||
panic!(
|
||||
"Errors occur: {:?}",
|
||||
state.get_err_collector().get_all_blocking()
|
||||
)
|
||||
}
|
||||
assert!(state.get_err_collector().is_empty());
|
||||
if let Some(expected) = expected.get(&now) {
|
||||
assert_eq!(*output.borrow(), *expected, "at ts={}", now);
|
||||
|
||||
@@ -29,8 +29,8 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector
|
||||
use crate::expr::error::{DataTypeSnafu, InternalSnafu};
|
||||
use crate::expr::{AggregateExpr, EvalError, ScalarExpr};
|
||||
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
|
||||
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter};
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
|
||||
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, KeyExpiryManager};
|
||||
|
||||
impl<'referred, 'df> Context<'referred, 'df> {
|
||||
const REDUCE: &'static str = "reduce";
|
||||
@@ -42,6 +42,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
|
||||
input: Box<TypedPlan>,
|
||||
key_val_plan: KeyValPlan,
|
||||
reduce_plan: ReducePlan,
|
||||
output_type: RelationType,
|
||||
) -> Result<CollectionBundle, Error> {
|
||||
let input = self.render_plan(*input)?;
|
||||
// first assembly key&val that's ((Row, Row), tick, diff)
|
||||
@@ -52,6 +53,15 @@ impl<'referred, 'df> Context<'referred, 'df> {
|
||||
|
||||
// TODO(discord9): config global expire time from self
|
||||
let arrange_handler = self.compute_state.new_arrange(None);
|
||||
|
||||
if let (Some(time_index), Some(expire_after)) =
|
||||
(output_type.time_index, self.compute_state.expire_after())
|
||||
{
|
||||
let expire_man =
|
||||
KeyExpiryManager::new(Some(expire_after), Some(ScalarExpr::Column(time_index)));
|
||||
arrange_handler.write().set_expire_state(expire_man);
|
||||
}
|
||||
|
||||
// reduce need full arrangement to be able to query all keys
|
||||
let arrange_handler_inner = arrange_handler.clone_full_arrange().context(PlanSnafu {
|
||||
reason: "No write is expected at this point",
|
||||
@@ -729,15 +739,273 @@ mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use common_time::{DateTime, Interval, Timestamp};
|
||||
use datatypes::data_type::{ConcreteDataType, ConcreteDataType as CDT};
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
|
||||
use super::*;
|
||||
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject};
|
||||
use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc};
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
/// SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00')
|
||||
/// input table columns: number, ts
|
||||
/// expected: sum(number), window_start, window_end
|
||||
#[test]
|
||||
fn test_tumble_group_by() {
|
||||
let mut df = Hydroflow::new();
|
||||
let mut state = DataflowState::default();
|
||||
let mut ctx = harness_test_ctx(&mut df, &mut state);
|
||||
const START: i64 = 1625097600000;
|
||||
let rows = vec![
|
||||
(1u32, START + 1000),
|
||||
(2u32, START + 1500),
|
||||
(3u32, START + 2000),
|
||||
(1u32, START + 2500),
|
||||
(2u32, START + 3000),
|
||||
(3u32, START + 3500),
|
||||
];
|
||||
let rows = rows
|
||||
.into_iter()
|
||||
.map(|(number, ts)| {
|
||||
(
|
||||
Row::new(vec![number.into(), Timestamp::new_millisecond(ts).into()]),
|
||||
1,
|
||||
1,
|
||||
)
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let collection = ctx.render_constant(rows.clone());
|
||||
ctx.insert_global(GlobalId::User(1), collection);
|
||||
|
||||
let aggr_expr = AggregateExpr {
|
||||
func: AggregateFunc::SumUInt32,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![
|
||||
ColumnType::new(CDT::uint64_datatype(), true), // sum(number)
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window start
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window end
|
||||
]),
|
||||
// TODO(discord9): mfp indirectly ref to key columns
|
||||
/*
|
||||
.with_key(vec![1])
|
||||
.with_time_index(Some(0)),*/
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
input: Box::new(
|
||||
Plan::Get {
|
||||
id: crate::expr::Id::Global(GlobalId::User(1)),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnType::new(ConcreteDataType::datetime_datatype(), false),
|
||||
])),
|
||||
),
|
||||
key_val_plan: KeyValPlan {
|
||||
key_plan: MapFilterProject::new(2)
|
||||
.map(vec![
|
||||
ScalarExpr::Column(1).call_unary(
|
||||
UnaryFunc::TumbleWindowFloor {
|
||||
window_size: Interval::from_month_day_nano(
|
||||
0,
|
||||
0,
|
||||
1_000_000_000,
|
||||
),
|
||||
start_time: Some(DateTime::new(1625097600000)),
|
||||
},
|
||||
),
|
||||
ScalarExpr::Column(1).call_unary(
|
||||
UnaryFunc::TumbleWindowCeiling {
|
||||
window_size: Interval::from_month_day_nano(
|
||||
0,
|
||||
0,
|
||||
1_000_000_000,
|
||||
),
|
||||
start_time: Some(DateTime::new(1625097600000)),
|
||||
},
|
||||
),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![2, 3])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
val_plan: MapFilterProject::new(2)
|
||||
.project(vec![0, 1])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
},
|
||||
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: vec![aggr_expr.clone()],
|
||||
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
}
|
||||
.with_types(
|
||||
RelationType::new(vec![
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window start
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window end
|
||||
ColumnType::new(CDT::uint64_datatype(), true), //sum(number)
|
||||
])
|
||||
.with_key(vec![1])
|
||||
.with_time_index(Some(0)),
|
||||
),
|
||||
),
|
||||
mfp: MapFilterProject::new(3)
|
||||
.map(vec![
|
||||
ScalarExpr::Column(2),
|
||||
ScalarExpr::Column(3),
|
||||
ScalarExpr::Column(0),
|
||||
ScalarExpr::Column(1),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![4, 5, 6])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
|
||||
let bundle = ctx.render_plan(expected).unwrap();
|
||||
|
||||
let output = get_output_handle(&mut ctx, bundle);
|
||||
drop(ctx);
|
||||
let expected = BTreeMap::from([(
|
||||
1,
|
||||
vec![
|
||||
(
|
||||
Row::new(vec![
|
||||
3u64.into(),
|
||||
Timestamp::new_millisecond(START + 1000).into(),
|
||||
Timestamp::new_millisecond(START + 2000).into(),
|
||||
]),
|
||||
1,
|
||||
1,
|
||||
),
|
||||
(
|
||||
Row::new(vec![
|
||||
4u64.into(),
|
||||
Timestamp::new_millisecond(START + 2000).into(),
|
||||
Timestamp::new_millisecond(START + 3000).into(),
|
||||
]),
|
||||
1,
|
||||
1,
|
||||
),
|
||||
(
|
||||
Row::new(vec![
|
||||
5u64.into(),
|
||||
Timestamp::new_millisecond(START + 3000).into(),
|
||||
Timestamp::new_millisecond(START + 4000).into(),
|
||||
]),
|
||||
1,
|
||||
1,
|
||||
),
|
||||
],
|
||||
)]);
|
||||
run_and_check(&mut state, &mut df, 1..2, expected, output);
|
||||
}
|
||||
|
||||
/// select avg(number) from number;
|
||||
#[test]
|
||||
fn test_avg_eval() {
|
||||
let mut df = Hydroflow::new();
|
||||
let mut state = DataflowState::default();
|
||||
let mut ctx = harness_test_ctx(&mut df, &mut state);
|
||||
|
||||
let rows = vec![
|
||||
(Row::new(vec![1u32.into()]), 1, 1),
|
||||
(Row::new(vec![2u32.into()]), 1, 1),
|
||||
(Row::new(vec![3u32.into()]), 1, 1),
|
||||
(Row::new(vec![1u32.into()]), 1, 1),
|
||||
(Row::new(vec![2u32.into()]), 1, 1),
|
||||
(Row::new(vec![3u32.into()]), 1, 1),
|
||||
];
|
||||
let collection = ctx.render_constant(rows.clone());
|
||||
ctx.insert_global(GlobalId::User(1), collection);
|
||||
|
||||
let aggr_exprs = vec![
|
||||
AggregateExpr {
|
||||
func: AggregateFunc::SumUInt32,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
},
|
||||
AggregateExpr {
|
||||
func: AggregateFunc::Count,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
},
|
||||
];
|
||||
let avg_expr = ScalarExpr::If {
|
||||
cond: Box::new(ScalarExpr::Column(1).call_binary(
|
||||
ScalarExpr::Literal(Value::from(0u32), CDT::int64_datatype()),
|
||||
BinaryFunc::NotEq,
|
||||
)),
|
||||
then: Box::new(ScalarExpr::Column(0).call_binary(
|
||||
ScalarExpr::Column(1).call_unary(UnaryFunc::Cast(CDT::uint64_datatype())),
|
||||
BinaryFunc::DivUInt64,
|
||||
)),
|
||||
els: Box::new(ScalarExpr::Literal(Value::Null, CDT::uint64_datatype())),
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
input: Box::new(
|
||||
Plan::Get {
|
||||
id: crate::expr::Id::Global(GlobalId::User(1)),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::int64_datatype(), false),
|
||||
])),
|
||||
),
|
||||
key_val_plan: KeyValPlan {
|
||||
key_plan: MapFilterProject::new(1)
|
||||
.project(vec![])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
val_plan: MapFilterProject::new(1)
|
||||
.project(vec![0])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
},
|
||||
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: aggr_exprs.clone(),
|
||||
simple_aggrs: vec![
|
||||
AggrWithIndex::new(aggr_exprs[0].clone(), 0, 0),
|
||||
AggrWithIndex::new(aggr_exprs[1].clone(), 0, 1),
|
||||
],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint32_datatype(), true),
|
||||
ColumnType::new(ConcreteDataType::int64_datatype(), true),
|
||||
])),
|
||||
),
|
||||
mfp: MapFilterProject::new(2)
|
||||
.map(vec![
|
||||
avg_expr,
|
||||
// TODO(discord9): optimize mfp so to remove indirect ref
|
||||
ScalarExpr::Column(2),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![3])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
|
||||
let bundle = ctx.render_plan(expected).unwrap();
|
||||
|
||||
let output = get_output_handle(&mut ctx, bundle);
|
||||
drop(ctx);
|
||||
let expected = BTreeMap::from([(1, vec![(Row::new(vec![2u64.into()]), 1, 1)])]);
|
||||
run_and_check(&mut state, &mut df, 1..2, expected, output);
|
||||
}
|
||||
|
||||
/// SELECT DISTINCT col FROM table
|
||||
///
|
||||
/// table schema:
|
||||
@@ -776,6 +1044,7 @@ mod test {
|
||||
Box::new(input_plan.with_types(typ)),
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
RelationType::empty(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -850,6 +1119,7 @@ mod test {
|
||||
Box::new(input_plan.with_types(typ)),
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
RelationType::empty(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -930,6 +1200,7 @@ mod test {
|
||||
Box::new(input_plan.with_types(typ)),
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
RelationType::empty(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1006,6 +1277,7 @@ mod test {
|
||||
Box::new(input_plan.with_types(typ)),
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
RelationType::empty(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1097,6 +1369,7 @@ mod test {
|
||||
Box::new(input_plan.with_types(typ)),
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
RelationType::empty(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
|
||||
&mut self,
|
||||
mut src_recv: broadcast::Receiver<DiffRow>,
|
||||
) -> Result<CollectionBundle, Error> {
|
||||
debug!("Rendering Source");
|
||||
let (send_port, recv_port) = self.df.make_edge::<_, Toff>("source");
|
||||
let arrange_handler = self.compute_state.new_arrange(None);
|
||||
let arrange_handler_inner =
|
||||
@@ -60,7 +61,6 @@ impl<'referred, 'df> Context<'referred, 'df> {
|
||||
let prev_avail = arr.into_iter().map(|((k, _), t, d)| (k, t, d));
|
||||
let mut to_send = Vec::new();
|
||||
let mut to_arrange = Vec::new();
|
||||
|
||||
// TODO(discord9): handling tokio broadcast error
|
||||
while let Ok((r, t, d)) = src_recv.try_recv() {
|
||||
if t <= now {
|
||||
@@ -72,7 +72,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
|
||||
let all = prev_avail.chain(to_send).collect_vec();
|
||||
if !all.is_empty() || !to_arrange.is_empty() {
|
||||
debug!(
|
||||
"All send: {} rows, not yet send: {} rows",
|
||||
"Rendered Source All send: {} rows, not yet send: {} rows",
|
||||
all.len(),
|
||||
to_arrange.len()
|
||||
);
|
||||
|
||||
@@ -42,6 +42,8 @@ pub struct DataflowState {
|
||||
/// save all used arrange in this dataflow, since usually there is no delete operation
|
||||
/// we can just keep track of all used arrange and schedule subgraph when they need to be updated
|
||||
arrange_used: Vec<ArrangeHandler>,
|
||||
/// the time arrangement need to be expired after a certain time in milliseconds
|
||||
expire_after: Option<Timestamp>,
|
||||
}
|
||||
|
||||
impl DataflowState {
|
||||
@@ -99,6 +101,14 @@ impl DataflowState {
|
||||
pub fn get_err_collector(&self) -> ErrCollector {
|
||||
self.err_collector.clone()
|
||||
}
|
||||
|
||||
pub fn set_expire_after(&mut self, after: Option<repr::Duration>) {
|
||||
self.expire_after = after;
|
||||
}
|
||||
|
||||
pub fn expire_after(&self) -> Option<Timestamp> {
|
||||
self.expire_after
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -153,6 +153,9 @@ pub struct ErrCollector {
|
||||
}
|
||||
|
||||
impl ErrCollector {
|
||||
pub fn get_all_blocking(&self) -> Vec<EvalError> {
|
||||
self.inner.blocking_lock().drain(..).collect_vec()
|
||||
}
|
||||
pub async fn get_all(&self) -> Vec<EvalError> {
|
||||
self.inner.lock().await.drain(..).collect_vec()
|
||||
}
|
||||
|
||||
@@ -17,8 +17,10 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use common_time::DateTime;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{DateTime, Timestamp};
|
||||
use datafusion_expr::Operator;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::types::cast;
|
||||
@@ -30,14 +32,14 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
use substrait::df_logical_plan::consumer::name_to_op;
|
||||
|
||||
use crate::adapter::error::{Error, InvalidQuerySnafu, PlanSnafu};
|
||||
use crate::adapter::error::{Error, ExternalSnafu, InvalidQuerySnafu, PlanSnafu};
|
||||
use crate::expr::error::{
|
||||
CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, TryFromValueSnafu,
|
||||
TypeMismatchSnafu,
|
||||
CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, OverflowSnafu,
|
||||
TryFromValueSnafu, TypeMismatchSnafu,
|
||||
};
|
||||
use crate::expr::signature::{GenericFn, Signature};
|
||||
use crate::expr::{InvalidArgumentSnafu, ScalarExpr};
|
||||
use crate::repr::{value_to_internal_ts, Row};
|
||||
use crate::expr::{InvalidArgumentSnafu, ScalarExpr, TypedExpr};
|
||||
use crate::repr::{self, value_to_internal_ts, Row};
|
||||
|
||||
/// UnmaterializableFunc is a function that can't be eval independently,
|
||||
/// and require special handling
|
||||
@@ -45,6 +47,11 @@ use crate::repr::{value_to_internal_ts, Row};
|
||||
pub enum UnmaterializableFunc {
|
||||
Now,
|
||||
CurrentSchema,
|
||||
TumbleWindow {
|
||||
ts: Box<TypedExpr>,
|
||||
window_size: common_time::Interval,
|
||||
start_time: Option<DateTime>,
|
||||
},
|
||||
}
|
||||
|
||||
impl UnmaterializableFunc {
|
||||
@@ -61,14 +68,51 @@ impl UnmaterializableFunc {
|
||||
output: ConcreteDataType::string_datatype(),
|
||||
generic_fn: GenericFn::CurrentSchema,
|
||||
},
|
||||
Self::TumbleWindow { .. } => Signature {
|
||||
input: smallvec![ConcreteDataType::timestamp_millisecond_datatype()],
|
||||
output: ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
generic_fn: GenericFn::TumbleWindow,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a UnmaterializableFunc from a string of the function name
|
||||
pub fn from_str(name: &str) -> Result<Self, Error> {
|
||||
match name {
|
||||
pub fn from_str_args(name: &str, args: Vec<TypedExpr>) -> Result<Self, Error> {
|
||||
match name.to_lowercase().as_str() {
|
||||
"now" => Ok(Self::Now),
|
||||
"current_schema" => Ok(Self::CurrentSchema),
|
||||
"tumble" => {
|
||||
let ts = args.first().context(InvalidQuerySnafu {
|
||||
reason: "Tumble window function requires a timestamp argument",
|
||||
})?;
|
||||
let window_size = args
|
||||
.get(1)
|
||||
.and_then(|expr| expr.expr.as_literal())
|
||||
.context(InvalidQuerySnafu {
|
||||
reason: "Tumble window function requires a window size argument"
|
||||
})?.as_string() // TODO(discord9): since df to substrait convertor does not support interval type yet, we need to take a string and cast it to interval instead
|
||||
.map(|s|cast(Value::from(s), &ConcreteDataType::interval_month_day_nano_datatype())).transpose().map_err(BoxedError::new).context(
|
||||
ExternalSnafu
|
||||
)?.and_then(|v|v.as_interval())
|
||||
.with_context(||InvalidQuerySnafu {
|
||||
reason: format!("Tumble window function requires window size argument to be a string describe a interval, found {:?}", args.get(1))
|
||||
})?;
|
||||
let start_time = match args.get(2) {
|
||||
Some(start_time) => start_time.expr.as_literal(),
|
||||
None => None,
|
||||
}
|
||||
.map(|s| cast(s.clone(), &ConcreteDataType::datetime_datatype())).transpose().map_err(BoxedError::new).context(ExternalSnafu)?.map(|v|v.as_datetime().with_context(
|
||||
||InvalidQuerySnafu {
|
||||
reason: format!("Tumble window function requires start time argument to be a datetime describe in string, found {:?}", args.get(2))
|
||||
}
|
||||
)).transpose()?;
|
||||
|
||||
Ok(Self::TumbleWindow {
|
||||
ts: Box::new(ts.clone()),
|
||||
window_size,
|
||||
start_time,
|
||||
})
|
||||
}
|
||||
_ => InvalidQuerySnafu {
|
||||
reason: format!("Unknown unmaterializable function: {}", name),
|
||||
}
|
||||
@@ -87,6 +131,14 @@ pub enum UnaryFunc {
|
||||
IsFalse,
|
||||
StepTimestamp,
|
||||
Cast(ConcreteDataType),
|
||||
TumbleWindowFloor {
|
||||
window_size: common_time::Interval,
|
||||
start_time: Option<DateTime>,
|
||||
},
|
||||
TumbleWindowCeiling {
|
||||
window_size: common_time::Interval,
|
||||
start_time: Option<DateTime>,
|
||||
},
|
||||
}
|
||||
|
||||
impl UnaryFunc {
|
||||
@@ -118,6 +170,16 @@ impl UnaryFunc {
|
||||
output: to.clone(),
|
||||
generic_fn: GenericFn::Cast,
|
||||
},
|
||||
Self::TumbleWindowFloor { .. } => Signature {
|
||||
input: smallvec![ConcreteDataType::timestamp_millisecond_datatype()],
|
||||
output: ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
generic_fn: GenericFn::TumbleWindow,
|
||||
},
|
||||
Self::TumbleWindowCeiling { .. } => Signature {
|
||||
input: smallvec![ConcreteDataType::timestamp_millisecond_datatype()],
|
||||
output: ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
generic_fn: GenericFn::TumbleWindow,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,10 +273,51 @@ impl UnaryFunc {
|
||||
debug!("Cast to type: {to:?}, result: {:?}", res);
|
||||
res
|
||||
}
|
||||
Self::TumbleWindowFloor {
|
||||
window_size,
|
||||
start_time,
|
||||
} => {
|
||||
let ts = get_ts_as_millisecond(arg)?;
|
||||
let start_time = start_time.map(|t| t.val()).unwrap_or(0);
|
||||
let window_size = (window_size.to_nanosecond() / 1_000_000) as repr::Duration; // nanosecond to millisecond
|
||||
let window_start = start_time + (ts - start_time) / window_size * window_size;
|
||||
|
||||
let ret = Timestamp::new_millisecond(window_start);
|
||||
Ok(Value::from(ret))
|
||||
}
|
||||
Self::TumbleWindowCeiling {
|
||||
window_size,
|
||||
start_time,
|
||||
} => {
|
||||
let ts = get_ts_as_millisecond(arg)?;
|
||||
let start_time = start_time.map(|t| t.val()).unwrap_or(0);
|
||||
let window_size = (window_size.to_nanosecond() / 1_000_000) as repr::Duration; // nanosecond to millisecond
|
||||
let window_start = start_time + (ts - start_time) / window_size * window_size;
|
||||
|
||||
let window_end = window_start + window_size;
|
||||
let ret = Timestamp::new_millisecond(window_end);
|
||||
Ok(Value::from(ret))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ts_as_millisecond(arg: Value) -> Result<repr::Timestamp, EvalError> {
|
||||
let ts = if let Some(ts) = arg.as_timestamp() {
|
||||
ts.convert_to(TimeUnit::Millisecond)
|
||||
.context(OverflowSnafu)?
|
||||
.value()
|
||||
} else if let Some(ts) = arg.as_datetime() {
|
||||
ts.val()
|
||||
} else {
|
||||
InvalidArgumentSnafu {
|
||||
reason: "Expect input to be timestamp or datetime type",
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
Ok(ts)
|
||||
}
|
||||
|
||||
/// BinaryFunc is a function that takes two arguments.
|
||||
/// Also notice this enum doesn't contain function arguments, since the arguments are stored in the expression.
|
||||
///
|
||||
@@ -375,6 +478,22 @@ impl BinaryFunc {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn add(input_type: ConcreteDataType) -> Result<Self, Error> {
|
||||
Self::specialization(GenericFn::Add, input_type)
|
||||
}
|
||||
|
||||
pub fn sub(input_type: ConcreteDataType) -> Result<Self, Error> {
|
||||
Self::specialization(GenericFn::Sub, input_type)
|
||||
}
|
||||
|
||||
pub fn mul(input_type: ConcreteDataType) -> Result<Self, Error> {
|
||||
Self::specialization(GenericFn::Mul, input_type)
|
||||
}
|
||||
|
||||
pub fn div(input_type: ConcreteDataType) -> Result<Self, Error> {
|
||||
Self::specialization(GenericFn::Div, input_type)
|
||||
}
|
||||
|
||||
/// Get the specialization of the binary function based on the generic function and the input type
|
||||
pub fn specialization(generic: GenericFn, input_type: ConcreteDataType) -> Result<Self, Error> {
|
||||
let rule = SPECIALIZATION.get_or_init(|| {
|
||||
|
||||
@@ -136,27 +136,44 @@ impl AggregateFunc {
|
||||
|
||||
/// Generate signature for each aggregate function
|
||||
macro_rules! generate_signature {
|
||||
($value:ident, { $($user_arm:tt)* },
|
||||
[ $(
|
||||
$auto_arm:ident=>($con_type:ident,$generic:ident)
|
||||
),*
|
||||
]) => {
|
||||
($value:ident,
|
||||
{ $($user_arm:tt)* },
|
||||
[ $(
|
||||
$auto_arm:ident=>($($arg:ident),*)
|
||||
),*
|
||||
]
|
||||
) => {
|
||||
match $value {
|
||||
$($user_arm)*,
|
||||
$(
|
||||
Self::$auto_arm => Signature {
|
||||
input: smallvec![
|
||||
ConcreteDataType::$con_type(),
|
||||
ConcreteDataType::$con_type(),
|
||||
],
|
||||
output: ConcreteDataType::$con_type(),
|
||||
generic_fn: GenericFn::$generic,
|
||||
},
|
||||
Self::$auto_arm => gen_one_siginature!($($arg),*),
|
||||
)*
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Generate one match arm with optional arguments
|
||||
macro_rules! gen_one_siginature {
|
||||
(
|
||||
$con_type:ident, $generic:ident
|
||||
) => {
|
||||
Signature {
|
||||
input: smallvec![ConcreteDataType::$con_type(), ConcreteDataType::$con_type(),],
|
||||
output: ConcreteDataType::$con_type(),
|
||||
generic_fn: GenericFn::$generic,
|
||||
}
|
||||
};
|
||||
(
|
||||
$in_type:ident, $out_type:ident, $generic:ident
|
||||
) => {
|
||||
Signature {
|
||||
input: smallvec![ConcreteDataType::$in_type()],
|
||||
output: ConcreteDataType::$out_type(),
|
||||
generic_fn: GenericFn::$generic,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static SPECIALIZATION: OnceLock<HashMap<(GenericFn, ConcreteDataType), AggregateFunc>> =
|
||||
OnceLock::new();
|
||||
|
||||
@@ -223,6 +240,8 @@ impl AggregateFunc {
|
||||
|
||||
/// all concrete datatypes with precision types will be returned with largest possible variant
|
||||
/// as a exception, count have a signature of `null -> i64`, but it's actually `anytype -> i64`
|
||||
///
|
||||
/// TODO(discorcd9): fix signature for sum unsign -> u64 sum signed -> i64
|
||||
pub fn signature(&self) -> Signature {
|
||||
generate_signature!(self, {
|
||||
AggregateFunc::Count => Signature {
|
||||
@@ -263,12 +282,12 @@ impl AggregateFunc {
|
||||
MinTime => (time_second_datatype, Min),
|
||||
MinDuration => (duration_second_datatype, Min),
|
||||
MinInterval => (interval_year_month_datatype, Min),
|
||||
SumInt16 => (int16_datatype, Sum),
|
||||
SumInt32 => (int32_datatype, Sum),
|
||||
SumInt64 => (int64_datatype, Sum),
|
||||
SumUInt16 => (uint16_datatype, Sum),
|
||||
SumUInt32 => (uint32_datatype, Sum),
|
||||
SumUInt64 => (uint64_datatype, Sum),
|
||||
SumInt16 => (int16_datatype, int64_datatype, Sum),
|
||||
SumInt32 => (int32_datatype, int64_datatype, Sum),
|
||||
SumInt64 => (int64_datatype, int64_datatype, Sum),
|
||||
SumUInt16 => (uint16_datatype, uint64_datatype, Sum),
|
||||
SumUInt32 => (uint32_datatype, uint64_datatype, Sum),
|
||||
SumUInt64 => (uint64_datatype, uint64_datatype, Sum),
|
||||
SumFloat32 => (float32_datatype, Sum),
|
||||
SumFloat64 => (float64_datatype, Sum),
|
||||
Any => (boolean_datatype, Any),
|
||||
|
||||
@@ -26,10 +26,10 @@ use crate::adapter::error::{
|
||||
};
|
||||
use crate::expr::error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
||||
use crate::expr::func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||
use crate::repr::ColumnType;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
/// A scalar expression with a known type.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)]
|
||||
pub struct TypedExpr {
|
||||
/// The expression.
|
||||
pub expr: ScalarExpr,
|
||||
@@ -43,6 +43,73 @@ impl TypedExpr {
|
||||
}
|
||||
}
|
||||
|
||||
impl TypedExpr {
|
||||
/// expand multi-value expression to multiple expressions with new indices
|
||||
pub fn expand_multi_value(
|
||||
input_typ: &RelationType,
|
||||
exprs: &[TypedExpr],
|
||||
) -> Result<Vec<TypedExpr>, Error> {
|
||||
// old indices in mfp, expanded expr
|
||||
let mut ret = vec![];
|
||||
let input_arity = input_typ.column_types.len();
|
||||
for (old_idx, expr) in exprs.iter().enumerate() {
|
||||
if let ScalarExpr::CallUnmaterializable(UnmaterializableFunc::TumbleWindow {
|
||||
ts,
|
||||
window_size,
|
||||
start_time,
|
||||
}) = &expr.expr
|
||||
{
|
||||
let floor = UnaryFunc::TumbleWindowFloor {
|
||||
window_size: *window_size,
|
||||
start_time: *start_time,
|
||||
};
|
||||
let ceil = UnaryFunc::TumbleWindowCeiling {
|
||||
window_size: *window_size,
|
||||
start_time: *start_time,
|
||||
};
|
||||
let floor = ScalarExpr::CallUnary {
|
||||
func: floor,
|
||||
expr: Box::new(ts.expr.clone()),
|
||||
}
|
||||
.with_type(ts.typ.clone());
|
||||
ret.push((None, floor));
|
||||
|
||||
let ceil = ScalarExpr::CallUnary {
|
||||
func: ceil,
|
||||
expr: Box::new(ts.expr.clone()),
|
||||
}
|
||||
.with_type(ts.typ.clone());
|
||||
ret.push((None, ceil));
|
||||
} else {
|
||||
ret.push((Some(input_arity + old_idx), expr.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
// get shuffled index(old_idx -> new_idx)
|
||||
// note index is offset by input_arity because mfp is designed to be first include input columns then intermediate columns
|
||||
let shuffle = ret
|
||||
.iter()
|
||||
.map(|(old_idx, _)| *old_idx) // [Option<opt_idx>]
|
||||
.enumerate()
|
||||
.map(|(new, old)| (old, new + input_arity))
|
||||
.flat_map(|(old, new)| old.map(|o| (o, new)))
|
||||
.chain((0..input_arity).map(|i| (i, i))) // also remember to chain the input columns as not changed
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
|
||||
// shuffle expr's index
|
||||
let exprs = ret
|
||||
.into_iter()
|
||||
.map(|(_, mut expr)| {
|
||||
// invariant: it is expect that no expr will try to refer the column being expanded
|
||||
expr.expr.permute_map(&shuffle)?;
|
||||
Ok(expr)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
Ok(dbg!(exprs))
|
||||
}
|
||||
}
|
||||
|
||||
/// A scalar expression, which can be evaluated to a value.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum ScalarExpr {
|
||||
@@ -83,6 +150,10 @@ pub enum ScalarExpr {
|
||||
}
|
||||
|
||||
impl ScalarExpr {
|
||||
pub fn with_type(self, typ: ColumnType) -> TypedExpr {
|
||||
TypedExpr::new(self, typ)
|
||||
}
|
||||
|
||||
/// try to determine the type of the expression
|
||||
pub fn typ(&self, context: &[ColumnType]) -> Result<ColumnType, Error> {
|
||||
match self {
|
||||
|
||||
@@ -64,4 +64,5 @@ pub enum GenericFn {
|
||||
// unmaterized func
|
||||
Now,
|
||||
CurrentSchema,
|
||||
TumbleWindow,
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
//! It also contains definition of expression, adapter and plan, and internal state management.
|
||||
|
||||
#![feature(let_chains)]
|
||||
#![feature(duration_abs_diff)]
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_imports)]
|
||||
#![warn(missing_docs)]
|
||||
@@ -30,3 +31,5 @@ mod plan;
|
||||
mod repr;
|
||||
mod transform;
|
||||
mod utils;
|
||||
|
||||
pub use adapter::{FlownodeBuilder, FlownodeManager, FlownodeManagerRef, FlownodeOptions};
|
||||
|
||||
@@ -44,7 +44,7 @@ pub struct TypedPlan {
|
||||
impl TypedPlan {
|
||||
/// directly apply a mfp to the plan
|
||||
pub fn mfp(self, mfp: MapFilterProject) -> Result<Self, Error> {
|
||||
let new_type = self.typ.apply_mfp(&mfp, &[])?;
|
||||
let new_type = self.typ.apply_mfp(&mfp)?;
|
||||
let plan = match self.plan {
|
||||
Plan::Mfp {
|
||||
input,
|
||||
@@ -68,14 +68,14 @@ impl TypedPlan {
|
||||
pub fn projection(self, exprs: Vec<TypedExpr>) -> Result<Self, Error> {
|
||||
let input_arity = self.typ.column_types.len();
|
||||
let output_arity = exprs.len();
|
||||
let (exprs, expr_typs): (Vec<_>, Vec<_>) = exprs
|
||||
let (exprs, _expr_typs): (Vec<_>, Vec<_>) = exprs
|
||||
.into_iter()
|
||||
.map(|TypedExpr { expr, typ }| (expr, typ))
|
||||
.unzip();
|
||||
let mfp = MapFilterProject::new(input_arity)
|
||||
.map(exprs)?
|
||||
.project(input_arity..input_arity + output_arity)?;
|
||||
let out_typ = self.typ.apply_mfp(&mfp, &expr_typs)?;
|
||||
let out_typ = self.typ.apply_mfp(&mfp)?;
|
||||
// special case for mfp to compose when the plan is already mfp
|
||||
let plan = match self.plan {
|
||||
Plan::Mfp {
|
||||
|
||||
@@ -111,13 +111,13 @@ impl RelationType {
|
||||
/// then new key=`[1]`, new time index=`[0]`
|
||||
///
|
||||
/// note that this function will remove empty keys like key=`[]` will be removed
|
||||
pub fn apply_mfp(&self, mfp: &MapFilterProject, expr_typs: &[ColumnType]) -> Result<Self> {
|
||||
let all_types = self
|
||||
.column_types
|
||||
.iter()
|
||||
.chain(expr_typs.iter())
|
||||
.cloned()
|
||||
.collect_vec();
|
||||
pub fn apply_mfp(&self, mfp: &MapFilterProject) -> Result<Self> {
|
||||
let mut all_types = self.column_types.clone();
|
||||
for expr in &mfp.expressions {
|
||||
let expr_typ = expr.typ(&self.column_types)?;
|
||||
all_types.push(expr_typ);
|
||||
}
|
||||
let all_types = all_types;
|
||||
let mfp_out_types = mfp
|
||||
.projection
|
||||
.iter()
|
||||
@@ -131,6 +131,7 @@ impl RelationType {
|
||||
})
|
||||
})
|
||||
.try_collect()?;
|
||||
|
||||
let old_to_new_col = BTreeMap::from_iter(
|
||||
mfp.projection
|
||||
.clone()
|
||||
@@ -205,6 +206,15 @@ impl RelationType {
|
||||
self
|
||||
}
|
||||
|
||||
/// will also remove time index from keys if it's in keys
|
||||
pub fn with_time_index(mut self, time_index: Option<usize>) -> Self {
|
||||
self.time_index = time_index;
|
||||
for key in &mut self.keys {
|
||||
key.remove_col(time_index.unwrap_or(usize::MAX));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Computes the number of columns in the relation.
|
||||
pub fn arity(&self) -> usize {
|
||||
self.column_types.len()
|
||||
|
||||
@@ -130,12 +130,60 @@ pub async fn sql_to_flow_plan(
|
||||
Ok(flow_plan)
|
||||
}
|
||||
|
||||
/// register flow-specific functions to the query engine
|
||||
pub fn register_function_to_query_engine(engine: &Arc<dyn QueryEngine>) {
|
||||
engine.register_function(Arc::new(TumbleFunction {}));
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TumbleFunction {}
|
||||
|
||||
const TUMBLE_NAME: &str = "tumble";
|
||||
|
||||
impl std::fmt::Display for TumbleFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", TUMBLE_NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
impl common_function::function::Function for TumbleFunction {
|
||||
fn name(&self) -> &str {
|
||||
TUMBLE_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[CDT]) -> common_query::error::Result<CDT> {
|
||||
Ok(CDT::datetime_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> common_query::prelude::Signature {
|
||||
common_query::prelude::Signature::variadic_any(common_query::prelude::Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(
|
||||
&self,
|
||||
_func_ctx: common_function::function::FunctionContext,
|
||||
_columns: &[datatypes::prelude::VectorRef],
|
||||
) -> common_query::error::Result<datatypes::prelude::VectorRef> {
|
||||
UnexpectedSnafu {
|
||||
reason: "Tumbler function is not implemented for datafusion executor",
|
||||
}
|
||||
.fail()
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_query::error::ExecuteSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::RegisterTableRequest;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use itertools::Itertools;
|
||||
use prost::Message;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::plan::LogicalPlan;
|
||||
@@ -144,23 +192,45 @@ mod test {
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use substrait_proto::proto;
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
use table::test_util::MemTable;
|
||||
|
||||
use super::*;
|
||||
use crate::adapter::node_context::IdToNameMap;
|
||||
use crate::repr::ColumnType;
|
||||
|
||||
pub fn create_test_ctx() -> FlownodeContext {
|
||||
let gid = GlobalId::User(0);
|
||||
let name = [
|
||||
"greptime".to_string(),
|
||||
"public".to_string(),
|
||||
"numbers".to_string(),
|
||||
];
|
||||
let schema = RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), false)]);
|
||||
let mut schemas = HashMap::new();
|
||||
let mut tri_map = IdToNameMap::new();
|
||||
tri_map.insert(Some(name.clone()), Some(0), gid);
|
||||
{
|
||||
let gid = GlobalId::User(0);
|
||||
let name = [
|
||||
"greptime".to_string(),
|
||||
"public".to_string(),
|
||||
"numbers".to_string(),
|
||||
];
|
||||
let schema = RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), false)]);
|
||||
|
||||
tri_map.insert(Some(name.clone()), Some(1024), gid);
|
||||
schemas.insert(gid, schema);
|
||||
}
|
||||
|
||||
{
|
||||
let gid = GlobalId::User(1);
|
||||
let name = [
|
||||
"greptime".to_string(),
|
||||
"public".to_string(),
|
||||
"numbers_with_ts".to_string(),
|
||||
];
|
||||
let schema = RelationType::new(vec![
|
||||
ColumnType::new(CDT::uint32_datatype(), false),
|
||||
ColumnType::new(CDT::datetime_datatype(), false),
|
||||
]);
|
||||
schemas.insert(gid, schema);
|
||||
tri_map.insert(Some(name.clone()), Some(1025), gid);
|
||||
}
|
||||
|
||||
FlownodeContext {
|
||||
schema: HashMap::from([(gid, schema)]),
|
||||
schema: schemas,
|
||||
table_repr: tri_map,
|
||||
query_context: Some(Arc::new(QueryContext::with("greptime", "public"))),
|
||||
..Default::default()
|
||||
@@ -177,9 +247,37 @@ mod test {
|
||||
table: NumbersTable::table(NUMBERS_TABLE_ID),
|
||||
};
|
||||
catalog_list.register_table_sync(req).unwrap();
|
||||
|
||||
let schema = vec![
|
||||
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
|
||||
datatypes::schema::ColumnSchema::new("ts", CDT::datetime_datatype(), false),
|
||||
];
|
||||
let mut columns = vec![];
|
||||
let numbers = (1..=10).collect_vec();
|
||||
let column: VectorRef = Arc::new(<u32 as Scalar>::VectorType::from_vec(numbers));
|
||||
columns.push(column);
|
||||
|
||||
let ts = (1..=10).collect_vec();
|
||||
let column: VectorRef = Arc::new(<DateTime as Scalar>::VectorType::from_vec(ts));
|
||||
columns.push(column);
|
||||
|
||||
let schema = Arc::new(Schema::new(schema));
|
||||
let recordbatch = common_recordbatch::RecordBatch::new(schema, columns).unwrap();
|
||||
let table = MemTable::table("numbers_with_ts", recordbatch);
|
||||
|
||||
let req_with_ts = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers_with_ts".to_string(),
|
||||
table_id: 1024,
|
||||
table,
|
||||
};
|
||||
catalog_list.register_table_sync(req_with_ts).unwrap();
|
||||
|
||||
let factory = query::QueryEngineFactory::new(catalog_list, None, None, None, false);
|
||||
|
||||
let engine = factory.query_engine();
|
||||
engine.register_function(Arc::new(TumbleFunction {}));
|
||||
|
||||
assert_eq!("datafusion", engine.name());
|
||||
engine
|
||||
|
||||
@@ -12,13 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::{Date, Timestamp};
|
||||
use datatypes::arrow::compute::kernels::window;
|
||||
use datatypes::arrow::ipc::Binary;
|
||||
use datatypes::data_type::ConcreteDataType as CDT;
|
||||
use datatypes::data_type::{ConcreteDataType as CDT, DataType};
|
||||
use datatypes::value::Value;
|
||||
use hydroflow::futures::future::Map;
|
||||
use itertools::Itertools;
|
||||
@@ -83,14 +83,18 @@ impl TypedExpr {
|
||||
}
|
||||
|
||||
impl AggregateExpr {
|
||||
/// Convert list of `Measure` into Flow's AggregateExpr
|
||||
///
|
||||
/// Return both the AggregateExpr and a MapFilterProject that is the final output of the aggregate function
|
||||
fn from_substrait_agg_measures(
|
||||
ctx: &mut FlownodeContext,
|
||||
measures: &[Measure],
|
||||
typ: &RelationType,
|
||||
extensions: &FunctionExtensions,
|
||||
) -> Result<Vec<AggregateExpr>, Error> {
|
||||
) -> Result<(Vec<AggregateExpr>, MapFilterProject), Error> {
|
||||
let _ = ctx;
|
||||
let mut aggr_exprs = vec![];
|
||||
let mut all_aggr_exprs = vec![];
|
||||
let mut post_maps = vec![];
|
||||
|
||||
for m in measures {
|
||||
let filter = &m
|
||||
@@ -99,7 +103,7 @@ impl AggregateExpr {
|
||||
.map(|fil| TypedExpr::from_substrait_rex(fil, typ, extensions))
|
||||
.transpose()?;
|
||||
|
||||
let agg_func = match &m.measure {
|
||||
let (aggr_expr, post_mfp) = match &m.measure {
|
||||
Some(f) => {
|
||||
let distinct = match f.invocation {
|
||||
_ if f.invocation == AggregationInvocation::Distinct as i32 => true,
|
||||
@@ -113,12 +117,30 @@ impl AggregateExpr {
|
||||
}
|
||||
None => not_impl_err!("Aggregate without aggregate function is not supported"),
|
||||
}?;
|
||||
aggr_exprs.push(agg_func);
|
||||
// permute col index refer to the output of post_mfp,
|
||||
// so to help construct a mfp at the end
|
||||
let mut post_map = post_mfp.unwrap_or(ScalarExpr::Column(0));
|
||||
let cur_arity = all_aggr_exprs.len();
|
||||
let remap = (0..aggr_expr.len()).map(|i| i + cur_arity).collect_vec();
|
||||
post_map.permute(&remap)?;
|
||||
|
||||
all_aggr_exprs.extend(aggr_expr);
|
||||
post_maps.push(post_map);
|
||||
}
|
||||
Ok(aggr_exprs)
|
||||
|
||||
let input_arity = all_aggr_exprs.len();
|
||||
let aggr_arity = post_maps.len();
|
||||
let post_mfp_final = MapFilterProject::new(all_aggr_exprs.len())
|
||||
.map(post_maps)?
|
||||
.project(input_arity..input_arity + aggr_arity)?;
|
||||
|
||||
Ok((all_aggr_exprs, post_mfp_final))
|
||||
}
|
||||
|
||||
/// Convert AggregateFunction into Flow's AggregateExpr
|
||||
///
|
||||
/// the returned value is a tuple of AggregateExpr and a optional ScalarExpr that if exist is the final output of the aggregate function
|
||||
/// since aggr functions like `avg` need to be transform to `sum(x)/cast(count(x) as x_type)`
|
||||
pub fn from_substrait_agg_func(
|
||||
f: &proto::AggregateFunction,
|
||||
input_schema: &RelationType,
|
||||
@@ -126,7 +148,7 @@ impl AggregateExpr {
|
||||
filter: &Option<TypedExpr>,
|
||||
order_by: &Option<Vec<TypedExpr>>,
|
||||
distinct: bool,
|
||||
) -> Result<AggregateExpr, Error> {
|
||||
) -> Result<(Vec<AggregateExpr>, Option<ScalarExpr>), Error> {
|
||||
// TODO(discord9): impl filter
|
||||
let _ = filter;
|
||||
let _ = order_by;
|
||||
@@ -141,26 +163,74 @@ impl AggregateExpr {
|
||||
args.push(arg_expr);
|
||||
}
|
||||
|
||||
if args.len() != 1 {
|
||||
return not_impl_err!("Aggregated function with multiple arguments is not supported");
|
||||
}
|
||||
|
||||
let arg = if let Some(first) = args.first() {
|
||||
first
|
||||
} else {
|
||||
return not_impl_err!("Aggregated function without arguments is not supported");
|
||||
};
|
||||
|
||||
let func = match extensions.get(&f.function_reference) {
|
||||
let fn_name = extensions
|
||||
.get(&f.function_reference)
|
||||
.cloned()
|
||||
.map(|s| s.to_lowercase());
|
||||
|
||||
match fn_name.as_ref().map(|s| s.as_ref()) {
|
||||
Some(Self::AVG_NAME) => AggregateExpr::from_avg_aggr_func(arg),
|
||||
Some(function_name) => {
|
||||
AggregateFunc::from_str_and_type(function_name, Some(arg.typ.scalar_type.clone()))
|
||||
let func = AggregateFunc::from_str_and_type(
|
||||
function_name,
|
||||
Some(arg.typ.scalar_type.clone()),
|
||||
)?;
|
||||
let exprs = vec![AggregateExpr {
|
||||
func,
|
||||
expr: arg.expr.clone(),
|
||||
distinct,
|
||||
}];
|
||||
let ret_mfp = None;
|
||||
Ok((exprs, ret_mfp))
|
||||
}
|
||||
None => not_impl_err!(
|
||||
"Aggregated function not found: function anchor = {:?}",
|
||||
f.function_reference
|
||||
),
|
||||
}?;
|
||||
Ok(AggregateExpr {
|
||||
func,
|
||||
}
|
||||
}
|
||||
const AVG_NAME: &'static str = "avg";
|
||||
/// convert `avg` function into `sum(x)/cast(count(x) as x_type)`
|
||||
fn from_avg_aggr_func(
|
||||
arg: &TypedExpr,
|
||||
) -> Result<(Vec<AggregateExpr>, Option<ScalarExpr>), Error> {
|
||||
let arg_type = arg.typ.scalar_type.clone();
|
||||
let sum = AggregateExpr {
|
||||
func: AggregateFunc::from_str_and_type("sum", Some(arg_type.clone()))?,
|
||||
expr: arg.expr.clone(),
|
||||
distinct,
|
||||
})
|
||||
distinct: false,
|
||||
};
|
||||
let sum_out_type = sum.func.signature().output.clone();
|
||||
let count = AggregateExpr {
|
||||
func: AggregateFunc::Count,
|
||||
expr: arg.expr.clone(),
|
||||
distinct: false,
|
||||
};
|
||||
let count_out_type = count.func.signature().output.clone();
|
||||
let avg_output = ScalarExpr::Column(0).call_binary(
|
||||
ScalarExpr::Column(1).call_unary(UnaryFunc::Cast(sum_out_type.clone())),
|
||||
BinaryFunc::div(sum_out_type.clone())?,
|
||||
);
|
||||
// make sure we wouldn't divide by zero
|
||||
let zero = ScalarExpr::literal(count_out_type.default_value(), count_out_type.clone());
|
||||
let non_zero = ScalarExpr::If {
|
||||
cond: Box::new(ScalarExpr::Column(1).call_binary(zero.clone(), BinaryFunc::NotEq)),
|
||||
then: Box::new(avg_output),
|
||||
els: Box::new(ScalarExpr::literal(Value::Null, sum_out_type.clone())),
|
||||
};
|
||||
let ret_aggr_exprs = vec![sum, count];
|
||||
let ret_mfp = Some(non_zero);
|
||||
Ok((ret_aggr_exprs, ret_mfp))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -217,6 +287,10 @@ impl KeyValPlan {
|
||||
|
||||
impl TypedPlan {
|
||||
/// Convert AggregateRel into Flow's TypedPlan
|
||||
///
|
||||
/// The output of aggr plan is:
|
||||
///
|
||||
/// <group_exprs>..<aggr_exprs>
|
||||
pub fn from_substrait_agg_rel(
|
||||
ctx: &mut FlownodeContext,
|
||||
agg: &proto::AggregateRel,
|
||||
@@ -228,10 +302,28 @@ impl TypedPlan {
|
||||
return not_impl_err!("Aggregate without an input is not supported");
|
||||
};
|
||||
|
||||
let group_exprs =
|
||||
TypedExpr::from_substrait_agg_grouping(ctx, &agg.groupings, &input.typ, extensions)?;
|
||||
let group_exprs = {
|
||||
let group_exprs = TypedExpr::from_substrait_agg_grouping(
|
||||
ctx,
|
||||
&agg.groupings,
|
||||
&input.typ,
|
||||
extensions,
|
||||
)?;
|
||||
|
||||
let mut aggr_exprs =
|
||||
TypedExpr::expand_multi_value(&input.typ, &group_exprs)?
|
||||
};
|
||||
|
||||
let time_index = group_exprs.iter().position(|expr| {
|
||||
matches!(
|
||||
&expr.expr,
|
||||
ScalarExpr::CallUnary {
|
||||
func: UnaryFunc::TumbleWindowFloor { .. },
|
||||
expr: _
|
||||
}
|
||||
)
|
||||
});
|
||||
|
||||
let (mut aggr_exprs, post_mfp) =
|
||||
AggregateExpr::from_substrait_agg_measures(ctx, &agg.measures, &input.typ, extensions)?;
|
||||
|
||||
let key_val_plan = KeyValPlan::from_substrait_gen_key_val_plan(
|
||||
@@ -240,6 +332,7 @@ impl TypedPlan {
|
||||
input.typ.column_types.len(),
|
||||
)?;
|
||||
|
||||
// output type is group_exprs + aggr_exprs
|
||||
let output_type = {
|
||||
let mut output_types = Vec::new();
|
||||
// first append group_expr as key, then aggr_expr as value
|
||||
@@ -253,8 +346,13 @@ impl TypedPlan {
|
||||
));
|
||||
}
|
||||
// TODO(discord9): try best to get time
|
||||
RelationType::new(output_types).with_key((0..group_exprs.len()).collect_vec())
|
||||
};
|
||||
if group_exprs.is_empty() {
|
||||
RelationType::new(output_types)
|
||||
} else {
|
||||
RelationType::new(output_types).with_key((0..group_exprs.len()).collect_vec())
|
||||
}
|
||||
}
|
||||
.with_time_index(time_index);
|
||||
|
||||
// copy aggr_exprs to full_aggrs, and split them into simple_aggrs and distinct_aggrs
|
||||
// also set them input/output column
|
||||
@@ -289,15 +387,46 @@ impl TypedPlan {
|
||||
key_val_plan,
|
||||
reduce_plan: ReducePlan::Accumulable(accum_plan),
|
||||
};
|
||||
Ok(TypedPlan {
|
||||
typ: output_type,
|
||||
plan,
|
||||
})
|
||||
// FIX(discord9): deal with key first
|
||||
if post_mfp.is_identity() {
|
||||
Ok(TypedPlan {
|
||||
typ: output_type,
|
||||
plan,
|
||||
})
|
||||
} else {
|
||||
// make post_mfp map identical mapping of keys
|
||||
let input = TypedPlan {
|
||||
typ: output_type.clone(),
|
||||
plan,
|
||||
};
|
||||
let key_arity = group_exprs.len();
|
||||
let mut post_mfp = post_mfp;
|
||||
let val_arity = post_mfp.input_arity;
|
||||
// offset post_mfp's col ref by `key_arity`
|
||||
let shuffle = BTreeMap::from_iter((0..val_arity).map(|v| (v, v + key_arity)));
|
||||
let new_arity = key_arity + val_arity;
|
||||
post_mfp.permute(shuffle, new_arity)?;
|
||||
// add key projection to post mfp
|
||||
let (m, f, p) = post_mfp.into_map_filter_project();
|
||||
let p = (0..key_arity).chain(p).collect_vec();
|
||||
let post_mfp = MapFilterProject::new(new_arity)
|
||||
.map(m)?
|
||||
.filter(f)?
|
||||
.project(p)?;
|
||||
Ok(TypedPlan {
|
||||
typ: output_type.apply_mfp(&post_mfp)?,
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(input),
|
||||
mfp: post_mfp,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::{DateTime, Interval};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use pretty_assertions::{assert_eq, assert_ne};
|
||||
|
||||
@@ -306,6 +435,283 @@ mod test {
|
||||
use crate::repr::{self, ColumnType, RelationType};
|
||||
use crate::transform::test::{create_test_ctx, create_test_query_engine, sql_to_substrait};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tumble_parse() {
|
||||
let engine = create_test_query_engine();
|
||||
let sql = "SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 hour', '2021-07-01 00:00:00')";
|
||||
let plan = sql_to_substrait(engine.clone(), sql).await;
|
||||
|
||||
let mut ctx = create_test_ctx();
|
||||
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan).unwrap();
|
||||
|
||||
let aggr_expr = AggregateExpr {
|
||||
func: AggregateFunc::SumUInt32,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![
|
||||
ColumnType::new(CDT::uint64_datatype(), true), // sum(number)
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window start
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window end
|
||||
]),
|
||||
// TODO(discord9): mfp indirectly ref to key columns
|
||||
/*
|
||||
.with_key(vec![1])
|
||||
.with_time_index(Some(0)),*/
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
input: Box::new(
|
||||
Plan::Get {
|
||||
id: crate::expr::Id::Global(GlobalId::User(1)),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnType::new(ConcreteDataType::datetime_datatype(), false),
|
||||
])),
|
||||
),
|
||||
key_val_plan: KeyValPlan {
|
||||
key_plan: MapFilterProject::new(2)
|
||||
.map(vec![
|
||||
ScalarExpr::Column(1).call_unary(
|
||||
UnaryFunc::TumbleWindowFloor {
|
||||
window_size: Interval::from_month_day_nano(
|
||||
0,
|
||||
0,
|
||||
3_600_000_000_000,
|
||||
),
|
||||
start_time: Some(DateTime::new(1625097600000)),
|
||||
},
|
||||
),
|
||||
ScalarExpr::Column(1).call_unary(
|
||||
UnaryFunc::TumbleWindowCeiling {
|
||||
window_size: Interval::from_month_day_nano(
|
||||
0,
|
||||
0,
|
||||
3_600_000_000_000,
|
||||
),
|
||||
start_time: Some(DateTime::new(1625097600000)),
|
||||
},
|
||||
),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![2, 3])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
val_plan: MapFilterProject::new(2)
|
||||
.project(vec![0, 1])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
},
|
||||
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: vec![aggr_expr.clone()],
|
||||
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
}
|
||||
.with_types(
|
||||
RelationType::new(vec![
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window start
|
||||
ColumnType::new(CDT::datetime_datatype(), false), // window end
|
||||
ColumnType::new(CDT::uint64_datatype(), true), //sum(number)
|
||||
])
|
||||
.with_key(vec![1])
|
||||
.with_time_index(Some(0)),
|
||||
),
|
||||
),
|
||||
mfp: MapFilterProject::new(3)
|
||||
.map(vec![
|
||||
ScalarExpr::Column(2),
|
||||
ScalarExpr::Column(3),
|
||||
ScalarExpr::Column(0),
|
||||
ScalarExpr::Column(1),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![4, 5, 6])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
assert_eq!(flow_plan, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_avg_group_by() {
|
||||
let engine = create_test_query_engine();
|
||||
let sql = "SELECT avg(number), number FROM numbers GROUP BY number";
|
||||
let plan = sql_to_substrait(engine.clone(), sql).await;
|
||||
|
||||
let mut ctx = create_test_ctx();
|
||||
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan);
|
||||
|
||||
let aggr_exprs = vec![
|
||||
AggregateExpr {
|
||||
func: AggregateFunc::SumUInt32,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
},
|
||||
AggregateExpr {
|
||||
func: AggregateFunc::Count,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
},
|
||||
];
|
||||
let avg_expr = ScalarExpr::If {
|
||||
cond: Box::new(ScalarExpr::Column(2).call_binary(
|
||||
ScalarExpr::Literal(Value::from(0i64), CDT::int64_datatype()),
|
||||
BinaryFunc::NotEq,
|
||||
)),
|
||||
then: Box::new(ScalarExpr::Column(1).call_binary(
|
||||
ScalarExpr::Column(2).call_unary(UnaryFunc::Cast(CDT::uint64_datatype())),
|
||||
BinaryFunc::DivUInt64,
|
||||
)),
|
||||
els: Box::new(ScalarExpr::Literal(Value::Null, CDT::uint64_datatype())),
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![
|
||||
ColumnType::new(CDT::uint64_datatype(), true), // sum(number) -> u64
|
||||
ColumnType::new(CDT::uint32_datatype(), false), // number
|
||||
]),
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
input: Box::new(
|
||||
Plan::Get {
|
||||
id: crate::expr::Id::Global(GlobalId::User(0)),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
|
||||
])),
|
||||
),
|
||||
key_val_plan: KeyValPlan {
|
||||
key_plan: MapFilterProject::new(1)
|
||||
.map(vec![ScalarExpr::Column(0)])
|
||||
.unwrap()
|
||||
.project(vec![1])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
val_plan: MapFilterProject::new(1)
|
||||
.project(vec![0])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
},
|
||||
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: aggr_exprs.clone(),
|
||||
simple_aggrs: vec![
|
||||
AggrWithIndex::new(aggr_exprs[0].clone(), 0, 0),
|
||||
AggrWithIndex::new(aggr_exprs[1].clone(), 0, 1),
|
||||
],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
}
|
||||
.with_types(
|
||||
RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint32_datatype(), false), // key: number
|
||||
ColumnType::new(ConcreteDataType::uint64_datatype(), true), // sum
|
||||
ColumnType::new(ConcreteDataType::int64_datatype(), true), // count
|
||||
])
|
||||
.with_key(vec![0]),
|
||||
),
|
||||
),
|
||||
mfp: MapFilterProject::new(3)
|
||||
.map(vec![
|
||||
avg_expr, // col 3
|
||||
// TODO(discord9): optimize mfp so to remove indirect ref
|
||||
ScalarExpr::Column(3), // col 4
|
||||
ScalarExpr::Column(0), // col 5
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![4, 5])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
assert_eq!(flow_plan.unwrap(), expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_avg() {
|
||||
let engine = create_test_query_engine();
|
||||
let sql = "SELECT avg(number) FROM numbers";
|
||||
let plan = sql_to_substrait(engine.clone(), sql).await;
|
||||
|
||||
let mut ctx = create_test_ctx();
|
||||
|
||||
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan).unwrap();
|
||||
|
||||
let aggr_exprs = vec![
|
||||
AggregateExpr {
|
||||
func: AggregateFunc::SumUInt32,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
},
|
||||
AggregateExpr {
|
||||
func: AggregateFunc::Count,
|
||||
expr: ScalarExpr::Column(0),
|
||||
distinct: false,
|
||||
},
|
||||
];
|
||||
let avg_expr = ScalarExpr::If {
|
||||
cond: Box::new(ScalarExpr::Column(1).call_binary(
|
||||
ScalarExpr::Literal(Value::from(0i64), CDT::int64_datatype()),
|
||||
BinaryFunc::NotEq,
|
||||
)),
|
||||
then: Box::new(ScalarExpr::Column(0).call_binary(
|
||||
ScalarExpr::Column(1).call_unary(UnaryFunc::Cast(CDT::uint64_datatype())),
|
||||
BinaryFunc::DivUInt64,
|
||||
)),
|
||||
els: Box::new(ScalarExpr::Literal(Value::Null, CDT::uint64_datatype())),
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
input: Box::new(
|
||||
Plan::Get {
|
||||
id: crate::expr::Id::Global(GlobalId::User(0)),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
|
||||
])),
|
||||
),
|
||||
key_val_plan: KeyValPlan {
|
||||
key_plan: MapFilterProject::new(1)
|
||||
.project(vec![])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
val_plan: MapFilterProject::new(1)
|
||||
.project(vec![0])
|
||||
.unwrap()
|
||||
.into_safe(),
|
||||
},
|
||||
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
|
||||
full_aggrs: aggr_exprs.clone(),
|
||||
simple_aggrs: vec![
|
||||
AggrWithIndex::new(aggr_exprs[0].clone(), 0, 0),
|
||||
AggrWithIndex::new(aggr_exprs[1].clone(), 0, 1),
|
||||
],
|
||||
distinct_aggrs: vec![],
|
||||
}),
|
||||
}
|
||||
.with_types(RelationType::new(vec![
|
||||
ColumnType::new(ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnType::new(ConcreteDataType::int64_datatype(), true),
|
||||
])),
|
||||
),
|
||||
mfp: MapFilterProject::new(2)
|
||||
.map(vec![
|
||||
avg_expr,
|
||||
// TODO(discord9): optimize mfp so to remove indirect ref
|
||||
ScalarExpr::Column(2),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![3])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
assert_eq!(flow_plan, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sum() {
|
||||
let engine = create_test_query_engine();
|
||||
@@ -315,7 +721,7 @@ mod test {
|
||||
let mut ctx = create_test_ctx();
|
||||
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan);
|
||||
let typ = RelationType::new(vec![ColumnType::new(
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
true,
|
||||
)]);
|
||||
let aggr_expr = AggregateExpr {
|
||||
@@ -324,7 +730,7 @@ mod test {
|
||||
distinct: false,
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), true)]),
|
||||
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
@@ -355,9 +761,9 @@ mod test {
|
||||
.with_types(typ),
|
||||
),
|
||||
mfp: MapFilterProject::new(1)
|
||||
.map(vec![ScalarExpr::Column(0)])
|
||||
.map(vec![ScalarExpr::Column(0), ScalarExpr::Column(1)])
|
||||
.unwrap()
|
||||
.project(vec![1])
|
||||
.project(vec![2])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
@@ -380,7 +786,7 @@ mod test {
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![
|
||||
ColumnType::new(CDT::uint32_datatype(), true), // col sum(number)
|
||||
ColumnType::new(CDT::uint64_datatype(), true), // col sum(number)
|
||||
ColumnType::new(CDT::uint32_datatype(), false), // col number
|
||||
]),
|
||||
plan: Plan::Mfp {
|
||||
@@ -415,15 +821,19 @@ mod test {
|
||||
.with_types(
|
||||
RelationType::new(vec![
|
||||
ColumnType::new(CDT::uint32_datatype(), false), // col number
|
||||
ColumnType::new(CDT::uint32_datatype(), true), // col sum(number)
|
||||
ColumnType::new(CDT::uint64_datatype(), true), // col sum(number)
|
||||
])
|
||||
.with_key(vec![0]),
|
||||
),
|
||||
),
|
||||
mfp: MapFilterProject::new(2)
|
||||
.map(vec![ScalarExpr::Column(1), ScalarExpr::Column(0)])
|
||||
.map(vec![
|
||||
ScalarExpr::Column(1),
|
||||
ScalarExpr::Column(2),
|
||||
ScalarExpr::Column(0),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![2, 3])
|
||||
.project(vec![3, 4])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
@@ -446,7 +856,7 @@ mod test {
|
||||
distinct: false,
|
||||
};
|
||||
let expected = TypedPlan {
|
||||
typ: RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), true)]),
|
||||
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
|
||||
plan: Plan::Mfp {
|
||||
input: Box::new(
|
||||
Plan::Reduce {
|
||||
@@ -478,14 +888,14 @@ mod test {
|
||||
}),
|
||||
}
|
||||
.with_types(RelationType::new(vec![ColumnType::new(
|
||||
CDT::uint32_datatype(),
|
||||
CDT::uint64_datatype(),
|
||||
true,
|
||||
)])),
|
||||
),
|
||||
mfp: MapFilterProject::new(1)
|
||||
.map(vec![ScalarExpr::Column(0)])
|
||||
.map(vec![ScalarExpr::Column(0), ScalarExpr::Column(1)])
|
||||
.unwrap()
|
||||
.project(vec![1])
|
||||
.project(vec![2])
|
||||
.unwrap(),
|
||||
},
|
||||
};
|
||||
|
||||
@@ -71,7 +71,7 @@ impl TypedExpr {
|
||||
),
|
||||
})?;
|
||||
let arg_len = f.arguments.len();
|
||||
let arg_exprs: Vec<TypedExpr> = f
|
||||
let arg_typed_exprs: Vec<TypedExpr> = f
|
||||
.arguments
|
||||
.iter()
|
||||
.map(|arg| match &arg.arg_type {
|
||||
@@ -83,7 +83,8 @@ impl TypedExpr {
|
||||
.try_collect()?;
|
||||
|
||||
// literal's type is determined by the function and type of other args
|
||||
let (arg_exprs, arg_types): (Vec<_>, Vec<_>) = arg_exprs
|
||||
let (arg_exprs, arg_types): (Vec<_>, Vec<_>) = arg_typed_exprs
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(
|
||||
|TypedExpr {
|
||||
@@ -174,7 +175,9 @@ impl TypedExpr {
|
||||
};
|
||||
expr.optimize();
|
||||
Ok(TypedExpr::new(expr, ret_type))
|
||||
} else if let Ok(func) = UnmaterializableFunc::from_str(fn_name) {
|
||||
} else if let Ok(func) =
|
||||
UnmaterializableFunc::from_str_args(fn_name, arg_typed_exprs)
|
||||
{
|
||||
let ret_type = ColumnType::new_nullable(func.signature().output.clone());
|
||||
Ok(TypedExpr::new(
|
||||
ScalarExpr::CallUnmaterializable(func),
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use itertools::Itertools;
|
||||
use snafu::OptionExt;
|
||||
use substrait_proto::proto::expression::MaskExpression;
|
||||
@@ -22,8 +24,8 @@ use substrait_proto::proto::{plan_rel, Plan as SubPlan, Rel};
|
||||
use crate::adapter::error::{
|
||||
Error, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu, UnexpectedSnafu,
|
||||
};
|
||||
use crate::expr::{MapFilterProject, TypedExpr};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::expr::{MapFilterProject, ScalarExpr, TypedExpr, UnaryFunc};
|
||||
use crate::plan::{KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, RelationType};
|
||||
use crate::transform::{substrait_proto, FlownodeContext, FunctionExtensions};
|
||||
|
||||
@@ -75,6 +77,7 @@ impl TypedPlan {
|
||||
} else {
|
||||
return not_impl_err!("Projection without an input is not supported");
|
||||
};
|
||||
|
||||
let mut exprs: Vec<TypedExpr> = vec![];
|
||||
for e in &p.expressions {
|
||||
let expr = TypedExpr::from_substrait_rex(e, &input.typ, extensions)?;
|
||||
@@ -97,6 +100,127 @@ impl TypedPlan {
|
||||
};
|
||||
Ok(TypedPlan { typ, plan })
|
||||
} else {
|
||||
/// if reduce_plan contains the special function like tumble floor/ceiling, add them to the proj_exprs
|
||||
fn rewrite_projection_after_reduce(
|
||||
key_val_plan: KeyValPlan,
|
||||
_reduce_plan: ReducePlan,
|
||||
reduce_output_type: &RelationType,
|
||||
proj_exprs: &mut Vec<TypedExpr>,
|
||||
) -> Result<(), Error> {
|
||||
// TODO: get keys correctly
|
||||
let key_exprs = key_val_plan
|
||||
.key_plan
|
||||
.projection
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|i| {
|
||||
if i < key_val_plan.key_plan.input_arity {
|
||||
ScalarExpr::Column(i)
|
||||
} else {
|
||||
key_val_plan.key_plan.expressions
|
||||
[i - key_val_plan.key_plan.input_arity]
|
||||
.clone()
|
||||
}
|
||||
})
|
||||
.collect_vec();
|
||||
let mut shift_offset = 0;
|
||||
let special_keys = key_exprs
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter(|(_idx, p)| {
|
||||
if matches!(
|
||||
p,
|
||||
ScalarExpr::CallUnary {
|
||||
func: UnaryFunc::TumbleWindowFloor { .. },
|
||||
..
|
||||
} | ScalarExpr::CallUnary {
|
||||
func: UnaryFunc::TumbleWindowCeiling { .. },
|
||||
..
|
||||
}
|
||||
) {
|
||||
if matches!(
|
||||
p,
|
||||
ScalarExpr::CallUnary {
|
||||
func: UnaryFunc::TumbleWindowFloor { .. },
|
||||
..
|
||||
}
|
||||
) {
|
||||
shift_offset += 1;
|
||||
}
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.collect_vec();
|
||||
let spec_key_arity = special_keys.len();
|
||||
if spec_key_arity == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
{
|
||||
// shift proj_exprs to the right by spec_key_arity
|
||||
let max_used_col_in_proj = proj_exprs
|
||||
.iter()
|
||||
.map(|expr| {
|
||||
expr.expr
|
||||
.get_all_ref_columns()
|
||||
.into_iter()
|
||||
.max()
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.max()
|
||||
.unwrap_or_default();
|
||||
|
||||
let shuffle = (0..=max_used_col_in_proj)
|
||||
.map(|col| (col, col + shift_offset))
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
for proj_expr in proj_exprs.iter_mut() {
|
||||
proj_expr.expr.permute_map(&shuffle)?;
|
||||
} // add key to the end
|
||||
for (key_idx, _key_expr) in special_keys {
|
||||
// here we assume the output type of reduce operator is just first keys columns, then append value columns
|
||||
proj_exprs.push(
|
||||
ScalarExpr::Column(key_idx).with_type(
|
||||
reduce_output_type.column_types[key_idx].clone(),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
match input.plan.clone() {
|
||||
Plan::Reduce {
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
..
|
||||
} => {
|
||||
rewrite_projection_after_reduce(
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
&input.typ,
|
||||
&mut exprs,
|
||||
)?;
|
||||
}
|
||||
Plan::Mfp { input, mfp: _ } => {
|
||||
if let Plan::Reduce {
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
..
|
||||
} = input.plan
|
||||
{
|
||||
rewrite_projection_after_reduce(
|
||||
key_val_plan,
|
||||
reduce_plan,
|
||||
&input.typ,
|
||||
&mut exprs,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
input.projection(exprs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ use crate::repr::{value_to_internal_ts, Diff, DiffRow, Duration, KeyValDiffRow,
|
||||
pub type Batch = BTreeMap<Row, SmallVec<[DiffRow; 2]>>;
|
||||
|
||||
/// A spine of batches, arranged by timestamp
|
||||
/// TODO(discord9): consider internally index by key, value, and timestamp for faster lookup
|
||||
pub type Spine = BTreeMap<Timestamp, Batch>;
|
||||
|
||||
/// Determine when should a key expire according to it's event timestamp in key.
|
||||
@@ -51,6 +52,17 @@ pub struct KeyExpiryManager {
|
||||
}
|
||||
|
||||
impl KeyExpiryManager {
|
||||
pub fn new(
|
||||
key_expiration_duration: Option<Duration>,
|
||||
event_timestamp_from_row: Option<ScalarExpr>,
|
||||
) -> Self {
|
||||
Self {
|
||||
event_ts_to_key: Default::default(),
|
||||
key_expiration_duration,
|
||||
event_timestamp_from_row,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract event timestamp from key row.
|
||||
///
|
||||
/// If no expire state is set, return None.
|
||||
@@ -177,6 +189,10 @@ impl Arrangement {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_expire_state(&mut self, expire_state: KeyExpiryManager) {
|
||||
self.expire_state = Some(expire_state);
|
||||
}
|
||||
|
||||
/// Apply updates into spine, with no respect of whether the updates are in futures, past, or now.
|
||||
///
|
||||
/// Return the maximum expire time (already expire by how much time) of all updates if any keys is already expired.
|
||||
|
||||
@@ -31,16 +31,19 @@ use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{InvalidRegionRequestSnafu, InvokeRegionServerSnafu, Result};
|
||||
|
||||
pub struct StandaloneDatanodeManager(pub RegionServer);
|
||||
pub struct StandaloneDatanodeManager {
|
||||
pub region_server: RegionServer,
|
||||
pub flow_server: FlownodeRef,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeManager for StandaloneDatanodeManager {
|
||||
async fn datanode(&self, _datanode: &Peer) -> DatanodeRef {
|
||||
RegionInvoker::arc(self.0.clone())
|
||||
RegionInvoker::arc(self.region_server.clone())
|
||||
}
|
||||
|
||||
async fn flownode(&self, _node: &Peer) -> FlownodeRef {
|
||||
unimplemented!()
|
||||
self.flow_server.clone()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,8 +24,6 @@ pub struct DatanodeOptions {
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DatanodeClientOptions {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub timeout: Duration,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub connect_timeout: Duration,
|
||||
pub tcp_nodelay: bool,
|
||||
@@ -34,7 +32,6 @@ pub struct DatanodeClientOptions {
|
||||
impl Default for DatanodeClientOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
timeout: Duration::from_secs(channel_manager::DEFAULT_GRPC_REQUEST_TIMEOUT_SECS),
|
||||
connect_timeout: Duration::from_secs(
|
||||
channel_manager::DEFAULT_GRPC_CONNECT_TIMEOUT_SECS,
|
||||
),
|
||||
|
||||
@@ -18,15 +18,15 @@ use async_trait::async_trait;
|
||||
use crate::error::Result;
|
||||
use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
|
||||
use crate::metasrv::Context;
|
||||
use crate::pubsub::{Message, PublishRef};
|
||||
use crate::pubsub::{Message, PublisherRef};
|
||||
|
||||
pub struct PublishHeartbeatHandler {
|
||||
publish: PublishRef,
|
||||
publisher: PublisherRef,
|
||||
}
|
||||
|
||||
impl PublishHeartbeatHandler {
|
||||
pub fn new(publish: PublishRef) -> PublishHeartbeatHandler {
|
||||
PublishHeartbeatHandler { publish }
|
||||
pub fn new(publisher: PublisherRef) -> PublishHeartbeatHandler {
|
||||
PublishHeartbeatHandler { publisher }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ impl HeartbeatHandler for PublishHeartbeatHandler {
|
||||
_: &mut HeartbeatAccumulator,
|
||||
) -> Result<HandleControl> {
|
||||
let msg = Message::Heartbeat(Box::new(req.clone()));
|
||||
self.publish.send_msg(msg).await;
|
||||
self.publisher.publish(msg).await;
|
||||
|
||||
Ok(HandleControl::Continue)
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ use crate::handler::HeartbeatHandlerGroup;
|
||||
use crate::lease::lookup_alive_datanode_peer;
|
||||
use crate::lock::DistLockRef;
|
||||
use crate::procedure::region_migration::manager::RegionMigrationManagerRef;
|
||||
use crate::pubsub::{PublishRef, SubscribeManagerRef};
|
||||
use crate::pubsub::{PublisherRef, SubscriptionManagerRef};
|
||||
use crate::selector::{Selector, SelectorType};
|
||||
use crate::service::mailbox::MailboxRef;
|
||||
use crate::service::store::cached_kv::LeaderCachedKvBackend;
|
||||
@@ -256,7 +256,7 @@ pub type ElectionRef = Arc<dyn Election<Leader = LeaderValue>>;
|
||||
pub struct MetaStateHandler {
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
subscribe_manager: Option<SubscribeManagerRef>,
|
||||
subscribe_manager: Option<SubscriptionManagerRef>,
|
||||
greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
|
||||
leader_cached_kv_backend: Arc<LeaderCachedKvBackend>,
|
||||
state: StateRef,
|
||||
@@ -295,7 +295,7 @@ impl MetaStateHandler {
|
||||
|
||||
if let Some(sub_manager) = self.subscribe_manager.clone() {
|
||||
info!("Leader changed, un_subscribe all");
|
||||
if let Err(e) = sub_manager.un_subscribe_all() {
|
||||
if let Err(e) = sub_manager.unsubscribe_all() {
|
||||
error!("Failed to un_subscribe all, error: {}", e);
|
||||
}
|
||||
}
|
||||
@@ -351,7 +351,7 @@ impl Metasrv {
|
||||
let procedure_manager = self.procedure_manager.clone();
|
||||
let in_memory = self.in_memory.clone();
|
||||
let leader_cached_kv_backend = self.leader_cached_kv_backend.clone();
|
||||
let subscribe_manager = self.subscribe_manager();
|
||||
let subscribe_manager = self.subscription_manager();
|
||||
let mut rx = election.subscribe_leader_change();
|
||||
let greptimedb_telemetry_task = self.greptimedb_telemetry_task.clone();
|
||||
greptimedb_telemetry_task
|
||||
@@ -540,12 +540,12 @@ impl Metasrv {
|
||||
&self.region_migration_manager
|
||||
}
|
||||
|
||||
pub fn publish(&self) -> Option<PublishRef> {
|
||||
self.plugins.get::<PublishRef>()
|
||||
pub fn publish(&self) -> Option<PublisherRef> {
|
||||
self.plugins.get::<PublisherRef>()
|
||||
}
|
||||
|
||||
pub fn subscribe_manager(&self) -> Option<SubscribeManagerRef> {
|
||||
self.plugins.get::<SubscribeManagerRef>()
|
||||
pub fn subscription_manager(&self) -> Option<SubscriptionManagerRef> {
|
||||
self.plugins.get::<SubscriptionManagerRef>()
|
||||
}
|
||||
|
||||
pub fn plugins(&self) -> &Plugins {
|
||||
|
||||
@@ -66,7 +66,7 @@ use crate::metasrv::{
|
||||
use crate::procedure::region_failover::RegionFailoverManager;
|
||||
use crate::procedure::region_migration::manager::RegionMigrationManager;
|
||||
use crate::procedure::region_migration::DefaultContextFactory;
|
||||
use crate::pubsub::PublishRef;
|
||||
use crate::pubsub::PublisherRef;
|
||||
use crate::selector::lease_based::LeaseBasedSelector;
|
||||
use crate::service::mailbox::MailboxRef;
|
||||
use crate::service::store::cached_kv::LeaderCachedKvBackend;
|
||||
@@ -320,7 +320,7 @@ impl MetasrvBuilder {
|
||||
|
||||
let publish_heartbeat_handler = plugins
|
||||
.clone()
|
||||
.and_then(|plugins| plugins.get::<PublishRef>())
|
||||
.and_then(|plugins| plugins.get::<PublisherRef>())
|
||||
.map(|publish| PublishHeartbeatHandler::new(publish.clone()));
|
||||
|
||||
let region_lease_handler = RegionLeaseHandler::new(
|
||||
|
||||
@@ -20,10 +20,10 @@ mod subscriber;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub use publish::{DefaultPublish, Publish, PublishRef};
|
||||
pub use publish::{DefaultPublisher, Publisher, PublisherRef};
|
||||
pub use subscribe_manager::{
|
||||
AddSubRequest, DefaultSubscribeManager, SubscribeManager, SubscribeManagerRef, SubscribeQuery,
|
||||
UnSubRequest,
|
||||
DefaultSubscribeManager, SubscribeRequest, SubscriptionManager, SubscriptionManagerRef,
|
||||
SubscriptionQuery, UnsubscribeRequest,
|
||||
};
|
||||
pub use subscriber::{Subscriber, SubscriberRef, Transport};
|
||||
|
||||
|
||||
@@ -18,53 +18,53 @@ use std::sync::Arc;
|
||||
|
||||
use common_telemetry::error;
|
||||
|
||||
use crate::pubsub::{Message, SubscribeManager, Transport, UnSubRequest};
|
||||
use crate::pubsub::{Message, SubscriptionManager, Transport, UnsubscribeRequest};
|
||||
|
||||
/// This trait provides a `send_msg` method that can be used by other modules
|
||||
/// This trait provides a `publish` method that can be used by other modules
|
||||
/// of meta to publish [Message].
|
||||
#[async_trait::async_trait]
|
||||
pub trait Publish: Send + Sync {
|
||||
async fn send_msg(&self, message: Message);
|
||||
pub trait Publisher: Send + Sync {
|
||||
async fn publish(&self, message: Message);
|
||||
}
|
||||
|
||||
pub type PublishRef = Arc<dyn Publish>;
|
||||
pub type PublisherRef = Arc<dyn Publisher>;
|
||||
|
||||
/// The default implementation of [Publish]
|
||||
pub struct DefaultPublish<M, T> {
|
||||
subscribe_manager: Arc<M>,
|
||||
/// The default implementation of [Publisher]
|
||||
pub struct DefaultPublisher<M, T> {
|
||||
subscription_manager: Arc<M>,
|
||||
_transport: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<M, T> DefaultPublish<M, T> {
|
||||
pub fn new(subscribe_manager: Arc<M>) -> Self {
|
||||
impl<M, T> DefaultPublisher<M, T> {
|
||||
pub fn new(subscription_manager: Arc<M>) -> Self {
|
||||
Self {
|
||||
subscribe_manager,
|
||||
subscription_manager,
|
||||
_transport: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<M, T> Publish for DefaultPublish<M, T>
|
||||
impl<M, T> Publisher for DefaultPublisher<M, T>
|
||||
where
|
||||
M: SubscribeManager<T>,
|
||||
M: SubscriptionManager<T>,
|
||||
T: Transport + Debug,
|
||||
{
|
||||
async fn send_msg(&self, message: Message) {
|
||||
let sub_list = self
|
||||
.subscribe_manager
|
||||
async fn publish(&self, message: Message) {
|
||||
let subscribers = self
|
||||
.subscription_manager
|
||||
.subscribers_by_topic(&message.topic());
|
||||
|
||||
for sub in sub_list {
|
||||
if sub.transport_msg(message.clone()).await.is_err() {
|
||||
for subscriber in subscribers {
|
||||
if subscriber.transport_msg(message.clone()).await.is_err() {
|
||||
// If an error occurs, we consider the subscriber offline,
|
||||
// so un_subscribe here.
|
||||
let req = UnSubRequest {
|
||||
subscriber_id: sub.id(),
|
||||
let req = UnsubscribeRequest {
|
||||
subscriber_id: subscriber.id(),
|
||||
};
|
||||
|
||||
if let Err(e) = self.subscribe_manager.un_subscribe(req.clone()) {
|
||||
error!(e; "failed to un_subscribe, req: {:?}", req);
|
||||
if let Err(e) = self.subscription_manager.unsubscribe(req.clone()) {
|
||||
error!(e; "failed to unsubscribe, req: {:?}", req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,94 +21,92 @@ use tokio::sync::mpsc::Sender;
|
||||
use crate::error::Result;
|
||||
use crate::pubsub::{Message, Subscriber, SubscriberRef, Topic, Transport};
|
||||
|
||||
pub trait SubscribeQuery<T>: Send + Sync {
|
||||
pub trait SubscriptionQuery<T>: Send + Sync {
|
||||
fn subscribers_by_topic(&self, topic: &Topic) -> Vec<SubscriberRef<T>>;
|
||||
}
|
||||
|
||||
pub trait SubscribeManager<T>: SubscribeQuery<T> {
|
||||
fn subscribe(&self, req: AddSubRequest<T>) -> Result<()>;
|
||||
pub trait SubscriptionManager<T>: SubscriptionQuery<T> {
|
||||
fn subscribe(&self, req: SubscribeRequest<T>) -> Result<()>;
|
||||
|
||||
fn un_subscribe(&self, req: UnSubRequest) -> Result<()>;
|
||||
fn unsubscribe(&self, req: UnsubscribeRequest) -> Result<()>;
|
||||
|
||||
fn un_subscribe_all(&self) -> Result<()>;
|
||||
fn unsubscribe_all(&self) -> Result<()>;
|
||||
}
|
||||
|
||||
pub type SubscribeManagerRef = Arc<dyn SubscribeManager<Sender<Message>>>;
|
||||
pub type SubscriptionManagerRef = Arc<dyn SubscriptionManager<Sender<Message>>>;
|
||||
|
||||
pub struct AddSubRequest<T> {
|
||||
pub topic_list: Vec<Topic>,
|
||||
pub struct SubscribeRequest<T> {
|
||||
pub topics: Vec<Topic>,
|
||||
pub subscriber: Subscriber<T>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UnSubRequest {
|
||||
pub struct UnsubscribeRequest {
|
||||
pub subscriber_id: u32,
|
||||
}
|
||||
|
||||
pub struct DefaultSubscribeManager<T> {
|
||||
topic2sub: DashMap<Topic, Vec<Arc<Subscriber<T>>>>,
|
||||
topic_to_subscribers: DashMap<Topic, Vec<Arc<Subscriber<T>>>>,
|
||||
}
|
||||
|
||||
impl<T> Default for DefaultSubscribeManager<T> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
topic2sub: DashMap::new(),
|
||||
topic_to_subscribers: DashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> SubscribeQuery<T> for DefaultSubscribeManager<T>
|
||||
impl<T> SubscriptionQuery<T> for DefaultSubscribeManager<T>
|
||||
where
|
||||
T: Transport,
|
||||
{
|
||||
fn subscribers_by_topic(&self, topic: &Topic) -> Vec<SubscriberRef<T>> {
|
||||
self.topic2sub
|
||||
self.topic_to_subscribers
|
||||
.get(topic)
|
||||
.map(|list_ref| list_ref.clone())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> SubscribeManager<T> for DefaultSubscribeManager<T>
|
||||
impl<T> SubscriptionManager<T> for DefaultSubscribeManager<T>
|
||||
where
|
||||
T: Transport,
|
||||
{
|
||||
fn subscribe(&self, req: AddSubRequest<T>) -> Result<()> {
|
||||
let AddSubRequest {
|
||||
topic_list,
|
||||
subscriber,
|
||||
} = req;
|
||||
fn subscribe(&self, req: SubscribeRequest<T>) -> Result<()> {
|
||||
let SubscribeRequest { topics, subscriber } = req;
|
||||
|
||||
info!(
|
||||
"Add a subscription, subscriber_id: {}, subscriber_name: {}, topic list: {:?}",
|
||||
"Add a subscriber, subscriber_id: {}, subscriber_name: {}, topics: {:?}",
|
||||
subscriber.id(),
|
||||
subscriber.name(),
|
||||
topic_list
|
||||
topics
|
||||
);
|
||||
|
||||
let subscriber = Arc::new(subscriber);
|
||||
|
||||
for topic in topic_list {
|
||||
let mut entry = self.topic2sub.entry(topic).or_default();
|
||||
for topic in topics {
|
||||
let mut entry = self.topic_to_subscribers.entry(topic).or_default();
|
||||
entry.push(subscriber.clone());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn un_subscribe(&self, req: UnSubRequest) -> Result<()> {
|
||||
let UnSubRequest { subscriber_id } = req;
|
||||
fn unsubscribe(&self, req: UnsubscribeRequest) -> Result<()> {
|
||||
let UnsubscribeRequest { subscriber_id } = req;
|
||||
|
||||
info!("Add a un_subscription, subscriber_id: {}", subscriber_id);
|
||||
info!("Remove a subscriber, subscriber_id: {}", subscriber_id);
|
||||
|
||||
for mut sub_list in self.topic2sub.iter_mut() {
|
||||
sub_list.retain(|subscriber| subscriber.id() != subscriber_id)
|
||||
for mut subscribers in self.topic_to_subscribers.iter_mut() {
|
||||
subscribers.retain(|subscriber| subscriber.id() != subscriber_id)
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn un_subscribe_all(&self) -> Result<()> {
|
||||
self.topic2sub.clear();
|
||||
fn unsubscribe_all(&self) -> Result<()> {
|
||||
self.topic_to_subscribers.clear();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -19,8 +19,8 @@ use tokio::sync::mpsc::{Receiver, Sender};
|
||||
|
||||
use super::DefaultSubscribeManager;
|
||||
use crate::pubsub::{
|
||||
AddSubRequest, DefaultPublish, Message, Publish, SubscribeManager, SubscribeQuery, Subscriber,
|
||||
Topic, UnSubRequest,
|
||||
DefaultPublisher, Message, Publisher, SubscribeRequest, Subscriber, SubscriptionManager,
|
||||
SubscriptionQuery, Topic, UnsubscribeRequest,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
@@ -28,15 +28,15 @@ async fn test_pubsub() {
|
||||
let manager = Arc::new(DefaultSubscribeManager::default());
|
||||
|
||||
let (subscriber1, mut rx1) = mock_subscriber(1, "tidigong");
|
||||
let req = AddSubRequest {
|
||||
topic_list: vec![Topic::Heartbeat],
|
||||
let req = SubscribeRequest {
|
||||
topics: vec![Topic::Heartbeat],
|
||||
subscriber: subscriber1,
|
||||
};
|
||||
manager.subscribe(req).unwrap();
|
||||
|
||||
let (subscriber2, mut rx2) = mock_subscriber(2, "gcrm");
|
||||
let req = AddSubRequest {
|
||||
topic_list: vec![Topic::Heartbeat],
|
||||
let req = SubscribeRequest {
|
||||
topics: vec![Topic::Heartbeat],
|
||||
subscriber: subscriber2,
|
||||
};
|
||||
manager.subscribe(req).unwrap();
|
||||
@@ -44,10 +44,10 @@ async fn test_pubsub() {
|
||||
let manager_clone = manager.clone();
|
||||
let message_number: usize = 5;
|
||||
tokio::spawn(async move {
|
||||
let publisher: DefaultPublish<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
|
||||
DefaultPublish::new(manager_clone);
|
||||
let publisher: DefaultPublisher<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
|
||||
DefaultPublisher::new(manager_clone);
|
||||
for _ in 0..message_number {
|
||||
publisher.send_msg(mock_message()).await;
|
||||
publisher.publish(mock_message()).await;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -59,12 +59,12 @@ async fn test_pubsub() {
|
||||
}
|
||||
|
||||
manager
|
||||
.un_subscribe(UnSubRequest { subscriber_id: 1 })
|
||||
.unsubscribe(UnsubscribeRequest { subscriber_id: 1 })
|
||||
.unwrap();
|
||||
let may_msg = rx1.recv().await;
|
||||
assert!(may_msg.is_none());
|
||||
|
||||
manager.un_subscribe_all().unwrap();
|
||||
manager.unsubscribe_all().unwrap();
|
||||
let may_msg = rx2.recv().await;
|
||||
assert!(may_msg.is_none());
|
||||
}
|
||||
@@ -74,15 +74,15 @@ async fn test_subscriber_disconnect() {
|
||||
let manager = Arc::new(DefaultSubscribeManager::default());
|
||||
|
||||
let (subscriber1, rx1) = mock_subscriber(1, "tidigong");
|
||||
let req = AddSubRequest {
|
||||
topic_list: vec![Topic::Heartbeat],
|
||||
let req = SubscribeRequest {
|
||||
topics: vec![Topic::Heartbeat],
|
||||
subscriber: subscriber1,
|
||||
};
|
||||
manager.subscribe(req).unwrap();
|
||||
|
||||
let (subscriber2, rx2) = mock_subscriber(2, "gcrm");
|
||||
let req = AddSubRequest {
|
||||
topic_list: vec![Topic::Heartbeat],
|
||||
let req = SubscribeRequest {
|
||||
topics: vec![Topic::Heartbeat],
|
||||
subscriber: subscriber2,
|
||||
};
|
||||
manager.subscribe(req).unwrap();
|
||||
@@ -90,10 +90,10 @@ async fn test_subscriber_disconnect() {
|
||||
let manager_clone = manager.clone();
|
||||
let message_number: usize = 5;
|
||||
let join = tokio::spawn(async move {
|
||||
let publisher: DefaultPublish<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
|
||||
DefaultPublish::new(manager_clone);
|
||||
let publisher: DefaultPublisher<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
|
||||
DefaultPublisher::new(manager_clone);
|
||||
for _ in 0..message_number {
|
||||
publisher.send_msg(mock_message()).await;
|
||||
publisher.publish(mock_message()).await;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -118,8 +118,8 @@ fn test_sub_manager() {
|
||||
let manager = DefaultSubscribeManager::default();
|
||||
|
||||
let subscriber = mock_subscriber(1, "tidigong").0;
|
||||
let req = AddSubRequest {
|
||||
topic_list: vec![Topic::Heartbeat],
|
||||
let req = SubscribeRequest {
|
||||
topics: vec![Topic::Heartbeat],
|
||||
subscriber,
|
||||
};
|
||||
manager.subscribe(req).unwrap();
|
||||
@@ -127,21 +127,21 @@ fn test_sub_manager() {
|
||||
assert_eq!(1, ret.len());
|
||||
|
||||
let subscriber = mock_subscriber(2, "gcrm").0;
|
||||
let req = AddSubRequest {
|
||||
topic_list: vec![Topic::Heartbeat],
|
||||
let req = SubscribeRequest {
|
||||
topics: vec![Topic::Heartbeat],
|
||||
subscriber,
|
||||
};
|
||||
manager.subscribe(req).unwrap();
|
||||
let ret = manager.subscribers_by_topic(&Topic::Heartbeat);
|
||||
assert_eq!(2, ret.len());
|
||||
|
||||
let req = UnSubRequest { subscriber_id: 1 };
|
||||
manager.un_subscribe(req).unwrap();
|
||||
let req = UnsubscribeRequest { subscriber_id: 1 };
|
||||
manager.unsubscribe(req).unwrap();
|
||||
let ret = manager.subscribers_by_topic(&Topic::Heartbeat);
|
||||
assert_eq!(1, ret.len());
|
||||
|
||||
let req = UnSubRequest { subscriber_id: 2 };
|
||||
manager.un_subscribe(req).unwrap();
|
||||
let req = UnsubscribeRequest { subscriber_id: 2 };
|
||||
manager.unsubscribe(req).unwrap();
|
||||
let ret = manager.subscribers_by_topic(&Topic::Heartbeat);
|
||||
assert_eq!(0, ret.len());
|
||||
}
|
||||
|
||||
@@ -396,7 +396,7 @@ impl TwcsCompactionTask {
|
||||
compacted_inputs.extend(output.inputs.iter().map(FileHandle::meta));
|
||||
|
||||
info!(
|
||||
"Compaction region {} output [{}]-> {}",
|
||||
"Compaction region {}. Input [{}] -> output {}",
|
||||
self.region_id,
|
||||
output
|
||||
.inputs
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::{error, info};
|
||||
use common_telemetry::{debug, error, info};
|
||||
use smallvec::SmallVec;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
@@ -118,7 +118,7 @@ impl WriteBufferManager for WriteBufferManagerImpl {
|
||||
fn should_flush_engine(&self) -> bool {
|
||||
let mutable_memtable_memory_usage = self.memory_active.load(Ordering::Relaxed);
|
||||
if mutable_memtable_memory_usage > self.mutable_limit {
|
||||
info!(
|
||||
debug!(
|
||||
"Engine should flush (over mutable limit), mutable_usage: {}, memory_usage: {}, mutable_limit: {}, global_limit: {}",
|
||||
mutable_memtable_memory_usage, self.memory_usage(), self.mutable_limit, self.global_write_buffer_size,
|
||||
);
|
||||
@@ -132,7 +132,7 @@ impl WriteBufferManager for WriteBufferManagerImpl {
|
||||
if memory_usage >= self.global_write_buffer_size
|
||||
&& mutable_memtable_memory_usage >= self.global_write_buffer_size / 2
|
||||
{
|
||||
info!(
|
||||
debug!(
|
||||
"Engine should flush (over total limit), memory_usage: {}, global_write_buffer_size: {}, \
|
||||
mutable_usage: {}.",
|
||||
memory_usage,
|
||||
|
||||
@@ -85,8 +85,8 @@ impl<S> RegionWorkerLoop<S> {
|
||||
let mut max_mem_region = None;
|
||||
|
||||
for region in ®ions {
|
||||
if self.flush_scheduler.is_flush_requested(region.region_id) {
|
||||
// Already flushing.
|
||||
if self.flush_scheduler.is_flush_requested(region.region_id) || !region.is_writable() {
|
||||
// Already flushing or not writable.
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -134,8 +134,8 @@ impl<S> RegionWorkerLoop<S> {
|
||||
let min_last_flush_time = now - self.config.auto_flush_interval.as_millis() as i64;
|
||||
|
||||
for region in ®ions {
|
||||
if self.flush_scheduler.is_flush_requested(region.region_id) {
|
||||
// Already flushing.
|
||||
if self.flush_scheduler.is_flush_requested(region.region_id) || !region.is_writable() {
|
||||
// Already flushing or not writable.
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -321,6 +321,7 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<
|
||||
connection,
|
||||
with,
|
||||
table_name,
|
||||
limit,
|
||||
..
|
||||
} = match stmt {
|
||||
CopyTable::To(arg) => arg,
|
||||
@@ -347,6 +348,7 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<
|
||||
pattern,
|
||||
direction,
|
||||
timestamp_range,
|
||||
limit,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,9 @@ use object_store::Entry;
|
||||
use regex::Regex;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
|
||||
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{CatalogSnafu, InvalidCopyDatabasePathSnafu};
|
||||
@@ -65,11 +67,29 @@ impl StatementExecutor {
|
||||
|
||||
let mut exported_rows = 0;
|
||||
for table_name in table_names {
|
||||
// TODO(hl): also handles tables with metric engine.
|
||||
// TODO(hl): remove this hardcode once we've removed numbers table.
|
||||
if table_name == "numbers" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let table = self
|
||||
.get_table(&TableReference {
|
||||
catalog: &req.catalog_name,
|
||||
schema: &req.schema_name,
|
||||
table: &table_name,
|
||||
})
|
||||
.await?;
|
||||
// Ignores physical tables of metric engine.
|
||||
if table.table_info().meta.engine == METRIC_ENGINE_NAME
|
||||
&& !table
|
||||
.table_info()
|
||||
.meta
|
||||
.options
|
||||
.extra_options
|
||||
.contains_key(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let mut table_file = req.location.clone();
|
||||
table_file.push_str(&table_name);
|
||||
table_file.push_str(suffix);
|
||||
@@ -90,6 +110,7 @@ impl StatementExecutor {
|
||||
pattern: None,
|
||||
direction: CopyDirection::Export,
|
||||
timestamp_range: req.time_range,
|
||||
limit: None,
|
||||
},
|
||||
ctx.clone(),
|
||||
)
|
||||
@@ -155,6 +176,7 @@ impl StatementExecutor {
|
||||
pattern: None,
|
||||
direction: CopyDirection::Import,
|
||||
timestamp_range: None,
|
||||
limit: None,
|
||||
};
|
||||
debug!("Copy table, arg: {:?}", req);
|
||||
match self.copy_table_from(req, ctx.clone()).await {
|
||||
|
||||
@@ -52,8 +52,6 @@ use crate::statement::StatementExecutor;
|
||||
|
||||
const DEFAULT_BATCH_SIZE: usize = 8192;
|
||||
const DEFAULT_READ_BUFFER: usize = 256 * 1024;
|
||||
const MAX_INSERT_ROWS: &str = "max_insert_rows";
|
||||
const DEFAULT_MAX_INSERT_ROWS: usize = 1000;
|
||||
|
||||
enum FileMetadata {
|
||||
Parquet {
|
||||
@@ -379,11 +377,7 @@ impl StatementExecutor {
|
||||
|
||||
let mut rows_inserted = 0;
|
||||
let mut insert_cost = 0;
|
||||
let max_insert_rows = req
|
||||
.with
|
||||
.get(MAX_INSERT_ROWS)
|
||||
.and_then(|val| val.parse::<usize>().ok())
|
||||
.unwrap_or(DEFAULT_MAX_INSERT_ROWS);
|
||||
let max_insert_rows = req.limit.map(|n| n as usize);
|
||||
for (compat_schema, file_schema_projection, projected_table_schema, file_metadata) in files
|
||||
{
|
||||
let mut stream = self
|
||||
@@ -435,8 +429,10 @@ impl StatementExecutor {
|
||||
insert_cost += cost;
|
||||
}
|
||||
|
||||
if rows_inserted >= max_insert_rows {
|
||||
return Ok(gen_insert_output(rows_inserted, insert_cost));
|
||||
if let Some(max_insert_rows) = max_insert_rows {
|
||||
if rows_inserted >= max_insert_rows {
|
||||
return Ok(gen_insert_output(rows_inserted, insert_cost));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -342,12 +342,16 @@ impl InstantManipulateStream {
|
||||
// and the function `vectorSelectorSingle`
|
||||
pub fn manipulate(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
|
||||
let mut take_indices = vec![];
|
||||
// TODO(ruihang): maybe the input is not timestamp millisecond array
|
||||
|
||||
let ts_column = input
|
||||
.column(self.time_index)
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Execution(
|
||||
"Time index Column downcast to TimestampMillisecondArray failed".into(),
|
||||
)
|
||||
})?;
|
||||
|
||||
// field column for staleness check
|
||||
let field_column = self
|
||||
|
||||
@@ -250,12 +250,15 @@ pub struct SeriesNormalizeStream {
|
||||
|
||||
impl SeriesNormalizeStream {
|
||||
pub fn normalize(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
|
||||
// TODO(ruihang): maybe the input is not timestamp millisecond array
|
||||
let ts_column = input
|
||||
.column(self.time_index)
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Execution(
|
||||
"Time index Column downcast to TimestampMillisecondArray failed".into(),
|
||||
)
|
||||
})?;
|
||||
|
||||
// bias the timestamp column by offset
|
||||
let ts_column_biased = if self.offset == 0 {
|
||||
|
||||
@@ -433,7 +433,7 @@ impl RangeManipulateStream {
|
||||
pub fn manipulate(&self, input: RecordBatch) -> DataFusionResult<Option<RecordBatch>> {
|
||||
let mut other_columns = (0..input.columns().len()).collect::<HashSet<_>>();
|
||||
// calculate the range
|
||||
let (aligned_ts, ranges) = self.calculate_range(&input);
|
||||
let (aligned_ts, ranges) = self.calculate_range(&input)?;
|
||||
// ignore this if all ranges are empty
|
||||
if ranges.iter().all(|(_, len)| *len == 0) {
|
||||
return Ok(None);
|
||||
@@ -472,12 +472,19 @@ impl RangeManipulateStream {
|
||||
.map_err(|e| DataFusionError::ArrowError(e, None))
|
||||
}
|
||||
|
||||
fn calculate_range(&self, input: &RecordBatch) -> (ArrayRef, Vec<(u32, u32)>) {
|
||||
fn calculate_range(
|
||||
&self,
|
||||
input: &RecordBatch,
|
||||
) -> DataFusionResult<(ArrayRef, Vec<(u32, u32)>)> {
|
||||
let ts_column = input
|
||||
.column(self.time_index)
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Execution(
|
||||
"Time index Column downcast to TimestampMillisecondArray failed".into(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut aligned_ts = vec![];
|
||||
let mut ranges = vec![];
|
||||
@@ -506,7 +513,7 @@ impl RangeManipulateStream {
|
||||
|
||||
let aligned_ts_array = Arc::new(TimestampMillisecondArray::from(aligned_ts)) as _;
|
||||
|
||||
(aligned_ts_array, ranges)
|
||||
Ok((aligned_ts_array, ranges))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,8 @@ use datafusion::prelude::{Column, Expr as DfExpr, JoinType};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use datafusion::sql::TableReference;
|
||||
use datafusion_expr::utils::conjunction;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use itertools::Itertools;
|
||||
use promql_parser::label::{MatchOp, Matcher, Matchers, METRIC_NAME};
|
||||
use promql_parser::parser::{
|
||||
@@ -910,9 +911,62 @@ impl PromPlanner {
|
||||
.resolve_table(table_ref.clone())
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
// Safety: `scan_filters` is not empty.
|
||||
let result = LogicalPlanBuilder::scan(table_ref, provider, None)
|
||||
|
||||
let is_time_index_ms = provider
|
||||
.as_any()
|
||||
.downcast_ref::<DefaultTableSource>()
|
||||
.context(UnknownTableSnafu)?
|
||||
.table_provider
|
||||
.as_any()
|
||||
.downcast_ref::<DfTableProviderAdapter>()
|
||||
.context(UnknownTableSnafu)?
|
||||
.table()
|
||||
.schema()
|
||||
.timestamp_column()
|
||||
.with_context(|| TimeIndexNotFoundSnafu {
|
||||
table: table_ref.to_quoted_string(),
|
||||
})?
|
||||
.data_type
|
||||
== ConcreteDataType::timestamp_millisecond_datatype();
|
||||
|
||||
let mut scan_plan = LogicalPlanBuilder::scan(table_ref.clone(), provider, None)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
|
||||
if !is_time_index_ms {
|
||||
// cast to ms if time_index not in Millisecond precision
|
||||
let expr: Vec<_> = self
|
||||
.ctx
|
||||
.field_columns
|
||||
.iter()
|
||||
.map(|col| DfExpr::Column(Column::new(Some(table_ref.clone()), col.clone())))
|
||||
.chain(self.create_tag_column_exprs()?)
|
||||
.chain(Some(DfExpr::Alias(Alias {
|
||||
expr: Box::new(DfExpr::Cast(Cast {
|
||||
expr: Box::new(self.create_time_index_column_expr()?),
|
||||
data_type: ArrowDataType::Timestamp(ArrowTimeUnit::Millisecond, None),
|
||||
})),
|
||||
relation: Some(table_ref.clone()),
|
||||
name: self
|
||||
.ctx
|
||||
.time_index_column
|
||||
.as_ref()
|
||||
.with_context(|| TimeIndexNotFoundSnafu {
|
||||
table: table_ref.to_quoted_string(),
|
||||
})?
|
||||
.clone(),
|
||||
})))
|
||||
.collect::<Vec<_>>();
|
||||
scan_plan = LogicalPlanBuilder::from(scan_plan)
|
||||
.project(expr)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
}
|
||||
|
||||
// Safety: `scan_filters` is not empty.
|
||||
let result = LogicalPlanBuilder::from(scan_plan)
|
||||
.filter(conjunction(filter).unwrap())
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
@@ -2972,4 +3026,99 @@ mod test {
|
||||
assert!(plan.is_err(), "query: {:?}", query);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_non_ms_precision() {
|
||||
let catalog_list = MemoryCatalogManager::with_default_setup();
|
||||
let columns = vec![
|
||||
ColumnSchema::new(
|
||||
"tag".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"timestamp".to_string(),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"field".to_string(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
];
|
||||
let schema = Arc::new(Schema::new(columns));
|
||||
let table_meta = TableMetaBuilder::default()
|
||||
.schema(schema)
|
||||
.primary_key_indices(vec![0])
|
||||
.value_indices(vec![2])
|
||||
.next_column_id(1024)
|
||||
.build()
|
||||
.unwrap();
|
||||
let table_info = TableInfoBuilder::default()
|
||||
.name("metrics".to_string())
|
||||
.meta(table_meta)
|
||||
.build()
|
||||
.unwrap();
|
||||
let table = EmptyTable::from_table_info(&table_info);
|
||||
assert!(catalog_list
|
||||
.register_table_sync(RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "metrics".to_string(),
|
||||
table_id: 1024,
|
||||
table,
|
||||
})
|
||||
.is_ok());
|
||||
|
||||
let plan = PromPlanner::stmt_to_plan(
|
||||
DfTableSourceProvider::new(catalog_list.clone(), false, QueryContext::arc().as_ref()),
|
||||
EvalStmt {
|
||||
expr: parser::parse("metrics{tag = \"1\"}").unwrap(),
|
||||
start: UNIX_EPOCH,
|
||||
end: UNIX_EPOCH
|
||||
.checked_add(Duration::from_secs(100_000))
|
||||
.unwrap(),
|
||||
interval: Duration::from_secs(5),
|
||||
lookback_delta: Duration::from_secs(1),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(plan.display_indent_schema().to_string(),
|
||||
"PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Sort: metrics.tag DESC NULLS LAST, metrics.timestamp DESC NULLS LAST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-1000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(Millisecond, None)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
|
||||
);
|
||||
let plan = PromPlanner::stmt_to_plan(
|
||||
DfTableSourceProvider::new(catalog_list.clone(), false, QueryContext::arc().as_ref()),
|
||||
EvalStmt {
|
||||
expr: parser::parse("avg_over_time(metrics{tag = \"1\"}[5s])").unwrap(),
|
||||
start: UNIX_EPOCH,
|
||||
end: UNIX_EPOCH
|
||||
.checked_add(Duration::from_secs(100_000))
|
||||
.unwrap(),
|
||||
interval: Duration::from_secs(5),
|
||||
lookback_delta: Duration::from_secs(1),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(plan.display_indent_schema().to_string(),
|
||||
"Filter: prom_avg_over_time(timestamp_range,field) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
|
||||
\n Projection: metrics.timestamp, prom_avg_over_time(timestamp_range, field) AS prom_avg_over_time(timestamp_range,field), metrics.tag [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
|
||||
\n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[5000], time index=[timestamp], values=[\"field\"] [field:Dictionary(Int64, Float64);N, tag:Utf8, timestamp:Timestamp(Millisecond, None), timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\
|
||||
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Sort: metrics.tag DESC NULLS LAST, metrics.timestamp DESC NULLS LAST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-6000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(Millisecond, None)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1116,9 +1116,11 @@ impl RangeSelectStream {
|
||||
let ts_column_ref = ts_column
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.ok_or(DataFusionError::Execution(
|
||||
"Time index Column downcast to TimestampMillisecondArray failed".into(),
|
||||
))?;
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Execution(
|
||||
"Time index Column downcast to TimestampMillisecondArray failed".into(),
|
||||
)
|
||||
})?;
|
||||
for i in 0..self.range_exec.len() {
|
||||
let args = self.evaluate_many(&batch, &self.range_exec[i].args)?;
|
||||
// use self.modify_map record (hash, align_ts) => [row_nums]
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::time::Duration;
|
||||
use arrow_schema::DataType;
|
||||
use async_recursion::async_recursion;
|
||||
use catalog::table_source::DfTableSourceProvider;
|
||||
use chrono::Utc;
|
||||
use common_time::interval::NANOS_PER_MILLI;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Interval, Timestamp, Timezone};
|
||||
@@ -27,10 +28,13 @@ use datafusion::prelude::Column;
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
|
||||
use datafusion_common::{DFSchema, DataFusionError, Result as DFResult};
|
||||
use datafusion_expr::execution_props::ExecutionProps;
|
||||
use datafusion_expr::simplify::SimplifyContext;
|
||||
use datafusion_expr::{
|
||||
Aggregate, Analyze, Explain, Expr, ExprSchemable, Extension, LogicalPlan, LogicalPlanBuilder,
|
||||
Projection,
|
||||
};
|
||||
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use promql_parser::util::parse_duration;
|
||||
use session::context::QueryContextRef;
|
||||
@@ -108,34 +112,84 @@ fn parse_expr_to_string(args: &[Expr], i: usize) -> DFResult<String> {
|
||||
/// Parse a duraion expr:
|
||||
/// 1. duration string (e.g. `'1h'`)
|
||||
/// 2. Interval expr (e.g. `INTERVAL '1 year 3 hours 20 minutes'`)
|
||||
/// 3. An interval expr can be evaluated at the logical plan stage (e.g. `INTERVAL '2' day - INTERVAL '1' day`)
|
||||
fn parse_duration_expr(args: &[Expr], i: usize) -> DFResult<Duration> {
|
||||
let interval_to_duration = |interval: Interval| -> Duration {
|
||||
Duration::from_millis((interval.to_nanosecond() / NANOS_PER_MILLI as i128) as u64)
|
||||
};
|
||||
match args.get(i) {
|
||||
Some(Expr::Literal(ScalarValue::Utf8(Some(str)))) => {
|
||||
parse_duration(str).map_err(DataFusionError::Plan)
|
||||
}
|
||||
Some(Expr::Literal(ScalarValue::IntervalYearMonth(Some(i)))) => {
|
||||
Ok(interval_to_duration(Interval::from_i32(*i)))
|
||||
Some(expr) => {
|
||||
let ms = evaluate_expr_to_millisecond(args, i, true)?;
|
||||
if ms <= 0 {
|
||||
return Err(dispose_parse_error(Some(expr)));
|
||||
}
|
||||
Ok(Duration::from_millis(ms as u64))
|
||||
}
|
||||
Some(Expr::Literal(ScalarValue::IntervalDayTime(Some(i)))) => {
|
||||
Ok(interval_to_duration(Interval::from_i64(*i)))
|
||||
}
|
||||
Some(Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(i)))) => {
|
||||
Ok(interval_to_duration(Interval::from_i128(*i)))
|
||||
}
|
||||
other => Err(dispose_parse_error(other)),
|
||||
None => Err(dispose_parse_error(None)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate a time calculation expr, case like:
|
||||
/// 1. `INTERVAL '1' day + INTERVAL '1 year 2 hours 3 minutes'`
|
||||
/// 2. `now() - INTERVAL '1' day` (when `interval_only==false`)
|
||||
///
|
||||
/// Output a millisecond timestamp
|
||||
///
|
||||
/// if `interval_only==true`, only accept expr with all interval type (case 2 will return a error)
|
||||
fn evaluate_expr_to_millisecond(args: &[Expr], i: usize, interval_only: bool) -> DFResult<i64> {
|
||||
let Some(expr) = args.get(i) else {
|
||||
return Err(dispose_parse_error(None));
|
||||
};
|
||||
if interval_only && !interval_only_in_expr(expr) {
|
||||
return Err(dispose_parse_error(Some(expr)));
|
||||
}
|
||||
let execution_props = ExecutionProps::new().with_query_execution_start_time(Utc::now());
|
||||
let info = SimplifyContext::new(&execution_props).with_schema(Arc::new(DFSchema::empty()));
|
||||
let interval_to_ms =
|
||||
|interval: Interval| -> i64 { (interval.to_nanosecond() / NANOS_PER_MILLI as i128) as i64 };
|
||||
let simplify_expr = ExprSimplifier::new(info).simplify(expr.clone())?;
|
||||
match simplify_expr {
|
||||
Expr::Literal(ScalarValue::TimestampNanosecond(ts_nanos, _))
|
||||
| Expr::Literal(ScalarValue::DurationNanosecond(ts_nanos)) => {
|
||||
ts_nanos.map(|v| v / 1_000_000)
|
||||
}
|
||||
Expr::Literal(ScalarValue::TimestampMicrosecond(ts_micros, _))
|
||||
| Expr::Literal(ScalarValue::DurationMicrosecond(ts_micros)) => {
|
||||
ts_micros.map(|v| v / 1_000)
|
||||
}
|
||||
Expr::Literal(ScalarValue::TimestampMillisecond(ts_millis, _))
|
||||
| Expr::Literal(ScalarValue::DurationMillisecond(ts_millis)) => ts_millis,
|
||||
Expr::Literal(ScalarValue::TimestampSecond(ts_secs, _))
|
||||
| Expr::Literal(ScalarValue::DurationSecond(ts_secs)) => ts_secs.map(|v| v * 1_000),
|
||||
Expr::Literal(ScalarValue::IntervalYearMonth(interval)) => {
|
||||
interval.map(|v| interval_to_ms(Interval::from_i32(v)))
|
||||
}
|
||||
Expr::Literal(ScalarValue::IntervalDayTime(interval)) => {
|
||||
interval.map(|v| interval_to_ms(Interval::from_i64(v)))
|
||||
}
|
||||
Expr::Literal(ScalarValue::IntervalMonthDayNano(interval)) => {
|
||||
interval.map(|v| interval_to_ms(Interval::from_i128(v)))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"{} is not a expr can be evaluate and use in range query",
|
||||
expr.display_name().unwrap_or_default()
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse the `align to` clause and return a UTC timestamp with unit of millisecond,
|
||||
/// which is used as the basis for dividing time slot during the align operation.
|
||||
/// 1. NOW: align to current execute time
|
||||
/// 2. Timestamp string: align to specific timestamp
|
||||
/// 3. leave empty (as Default Option): align to unix epoch 0 (timezone aware)
|
||||
/// 3. An expr can be evaluated at the logical plan stage (e.g. `now() - INTERVAL '1' day`)
|
||||
/// 4. leave empty (as Default Option): align to unix epoch 0 (timezone aware)
|
||||
fn parse_align_to(args: &[Expr], i: usize, timezone: Option<&Timezone>) -> DFResult<i64> {
|
||||
let s = parse_str_expr(args, i)?;
|
||||
let Ok(s) = parse_str_expr(args, i) else {
|
||||
return evaluate_expr_to_millisecond(args, i, false);
|
||||
};
|
||||
let upper = s.to_uppercase();
|
||||
match upper.as_str() {
|
||||
"NOW" => return Ok(Timestamp::current_millis().value()),
|
||||
@@ -469,6 +523,25 @@ fn have_range_in_exprs(exprs: &[Expr]) -> bool {
|
||||
})
|
||||
}
|
||||
|
||||
fn interval_only_in_expr(expr: &Expr) -> bool {
|
||||
let mut all_interval = true;
|
||||
let _ = expr.apply(&mut |expr| {
|
||||
if !matches!(
|
||||
expr,
|
||||
Expr::Literal(ScalarValue::IntervalDayTime(_))
|
||||
| Expr::Literal(ScalarValue::IntervalMonthDayNano(_))
|
||||
| Expr::Literal(ScalarValue::IntervalYearMonth(_))
|
||||
| Expr::BinaryExpr(_)
|
||||
) {
|
||||
all_interval = false;
|
||||
Ok(TreeNodeRecursion::Stop)
|
||||
} else {
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
}
|
||||
});
|
||||
all_interval
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
@@ -477,6 +550,7 @@ mod test {
|
||||
use catalog::memory::MemoryCatalogManager;
|
||||
use catalog::RegisterTableRequest;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datafusion_expr::{BinaryExpr, Operator};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use session::context::QueryContext;
|
||||
@@ -754,8 +828,42 @@ mod test {
|
||||
parse_duration_expr(&args, 0).unwrap(),
|
||||
parse_duration("1y4w").unwrap()
|
||||
);
|
||||
// test err
|
||||
// test index err
|
||||
assert!(parse_duration_expr(&args, 10).is_err());
|
||||
// test evaluate expr
|
||||
let args = vec![Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
op: Operator::Plus,
|
||||
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
})];
|
||||
assert_eq!(
|
||||
parse_duration_expr(&args, 0).unwrap().as_millis(),
|
||||
interval_to_ms(Interval::from_year_month(20))
|
||||
);
|
||||
let args = vec![Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
op: Operator::Minus,
|
||||
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
})];
|
||||
// test zero interval error
|
||||
assert!(parse_duration_expr(&args, 0).is_err());
|
||||
// test must all be interval
|
||||
let args = vec![Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
op: Operator::Minus,
|
||||
right: Box::new(Expr::Literal(ScalarValue::Time64Microsecond(Some(0)))),
|
||||
})];
|
||||
assert!(parse_duration_expr(&args, 0).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -787,19 +895,56 @@ mod test {
|
||||
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
|
||||
"1970-01-01T00:00:00+08:00".into(),
|
||||
)))];
|
||||
assert!(parse_align_to(&args, 0, None).unwrap() == -8 * 60 * 60 * 1000);
|
||||
assert_eq!(parse_align_to(&args, 0, None).unwrap(), -8 * 60 * 60 * 1000);
|
||||
// timezone
|
||||
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
|
||||
"1970-01-01T00:00:00".into(),
|
||||
)))];
|
||||
assert!(
|
||||
assert_eq!(
|
||||
parse_align_to(
|
||||
&args,
|
||||
0,
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
|
||||
)
|
||||
.unwrap()
|
||||
== -8 * 60 * 60 * 1000
|
||||
.unwrap(),
|
||||
-8 * 60 * 60 * 1000
|
||||
);
|
||||
// test evaluate expr
|
||||
let args = vec![Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
op: Operator::Plus,
|
||||
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
})];
|
||||
assert_eq!(
|
||||
parse_align_to(&args, 0, None).unwrap(),
|
||||
// 20 month
|
||||
20 * 30 * 24 * 60 * 60 * 1000
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interval_only() {
|
||||
let expr = Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Literal(ScalarValue::DurationMillisecond(Some(20)))),
|
||||
op: Operator::Minus,
|
||||
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
});
|
||||
assert!(!interval_only_in_expr(&expr));
|
||||
let expr = Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
op: Operator::Minus,
|
||||
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
|
||||
Interval::from_year_month(10).to_i32(),
|
||||
)))),
|
||||
});
|
||||
assert!(interval_only_in_expr(&expr));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ use sql::dialect::GreptimeDbDialect;
|
||||
use sql::parser::ParserContext;
|
||||
use sql::statements::create::{CreateTable, TIME_INDEX};
|
||||
use sql::statements::{self, OptionMap};
|
||||
use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column};
|
||||
use table::metadata::{TableInfoRef, TableMeta};
|
||||
use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY};
|
||||
|
||||
@@ -96,6 +97,7 @@ fn create_column_def(column_schema: &ColumnSchema, quote_style: char) -> Result<
|
||||
}
|
||||
|
||||
fn create_table_constraints(
|
||||
engine: &str,
|
||||
schema: &SchemaRef,
|
||||
table_meta: &TableMeta,
|
||||
quote_style: char,
|
||||
@@ -111,9 +113,16 @@ fn create_table_constraints(
|
||||
});
|
||||
}
|
||||
if !table_meta.primary_key_indices.is_empty() {
|
||||
let is_metric_engine = is_metric_engine(engine);
|
||||
let columns = table_meta
|
||||
.row_key_column_names()
|
||||
.map(|name| Ident::with_quote(quote_style, name))
|
||||
.flat_map(|name| {
|
||||
if is_metric_engine && is_metric_engine_internal_column(name) {
|
||||
None
|
||||
} else {
|
||||
Some(Ident::with_quote(quote_style, name))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
constraints.push(TableConstraint::Unique {
|
||||
name: None,
|
||||
@@ -131,14 +140,20 @@ pub fn create_table_stmt(table_info: &TableInfoRef, quote_style: char) -> Result
|
||||
let table_meta = &table_info.meta;
|
||||
let table_name = &table_info.name;
|
||||
let schema = &table_info.meta.schema;
|
||||
|
||||
let is_metric_engine = is_metric_engine(&table_meta.engine);
|
||||
let columns = schema
|
||||
.column_schemas()
|
||||
.iter()
|
||||
.map(|c| create_column_def(c, quote_style))
|
||||
.filter_map(|c| {
|
||||
if is_metric_engine && is_metric_engine_internal_column(&c.name) {
|
||||
None
|
||||
} else {
|
||||
Some(create_column_def(c, quote_style))
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let constraints = create_table_constraints(schema, table_meta, quote_style);
|
||||
let constraints = create_table_constraints(&table_meta.engine, schema, table_meta, quote_style);
|
||||
|
||||
Ok(CreateTable {
|
||||
if_not_exists: true,
|
||||
|
||||
@@ -308,6 +308,71 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_alter_change_column_alias_type() {
|
||||
let sql_1 = "ALTER TABLE my_metric_1 MODIFY COLUMN a MediumText";
|
||||
let mut result_1 = ParserContext::create_with_dialect(
|
||||
sql_1,
|
||||
&GreptimeDbDialect {},
|
||||
ParseOptions::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
match result_1.remove(0) {
|
||||
Statement::Alter(alter_table) => {
|
||||
assert_eq!("my_metric_1", alter_table.table_name().0[0].value);
|
||||
|
||||
let alter_operation = alter_table.alter_operation();
|
||||
assert_matches!(
|
||||
alter_operation,
|
||||
AlterTableOperation::ChangeColumnType { .. }
|
||||
);
|
||||
match alter_operation {
|
||||
AlterTableOperation::ChangeColumnType {
|
||||
column_name,
|
||||
target_type,
|
||||
} => {
|
||||
assert_eq!("a", column_name.value);
|
||||
assert_eq!(DataType::Text, *target_type);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let sql_2 = "ALTER TABLE my_metric_1 MODIFY COLUMN a TIMESTAMP_US";
|
||||
let mut result_2 = ParserContext::create_with_dialect(
|
||||
sql_2,
|
||||
&GreptimeDbDialect {},
|
||||
ParseOptions::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
match result_2.remove(0) {
|
||||
Statement::Alter(alter_table) => {
|
||||
assert_eq!("my_metric_1", alter_table.table_name().0[0].value);
|
||||
|
||||
let alter_operation = alter_table.alter_operation();
|
||||
assert_matches!(
|
||||
alter_operation,
|
||||
AlterTableOperation::ChangeColumnType { .. }
|
||||
);
|
||||
match alter_operation {
|
||||
AlterTableOperation::ChangeColumnType {
|
||||
column_name,
|
||||
target_type,
|
||||
} => {
|
||||
assert_eq!("a", column_name.value);
|
||||
assert!(matches!(target_type, DataType::Timestamp(Some(6), _)));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_alter_rename_table() {
|
||||
let sql = "ALTER TABLE test_table table_t";
|
||||
|
||||
@@ -56,7 +56,14 @@ impl<'a> ParserContext<'a> {
|
||||
})?;
|
||||
|
||||
let req = if self.parser.parse_keyword(Keyword::TO) {
|
||||
let (with, connection, location) = self.parse_copy_parameters()?;
|
||||
let (with, connection, location, limit) = self.parse_copy_parameters()?;
|
||||
if limit.is_some() {
|
||||
return error::InvalidSqlSnafu {
|
||||
msg: "limit is not supported",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
let argument = CopyDatabaseArgument {
|
||||
database_name,
|
||||
with: with.into(),
|
||||
@@ -68,7 +75,14 @@ impl<'a> ParserContext<'a> {
|
||||
self.parser
|
||||
.expect_keyword(Keyword::FROM)
|
||||
.context(error::SyntaxSnafu)?;
|
||||
let (with, connection, location) = self.parse_copy_parameters()?;
|
||||
let (with, connection, location, limit) = self.parse_copy_parameters()?;
|
||||
if limit.is_some() {
|
||||
return error::InvalidSqlSnafu {
|
||||
msg: "limit is not supported",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
let argument = CopyDatabaseArgument {
|
||||
database_name,
|
||||
with: with.into(),
|
||||
@@ -91,28 +105,30 @@ impl<'a> ParserContext<'a> {
|
||||
let table_name = Self::canonicalize_object_name(raw_table_name);
|
||||
|
||||
if self.parser.parse_keyword(Keyword::TO) {
|
||||
let (with, connection, location) = self.parse_copy_parameters()?;
|
||||
let (with, connection, location, limit) = self.parse_copy_parameters()?;
|
||||
Ok(CopyTable::To(CopyTableArgument {
|
||||
table_name,
|
||||
with: with.into(),
|
||||
connection: connection.into(),
|
||||
location,
|
||||
limit,
|
||||
}))
|
||||
} else {
|
||||
self.parser
|
||||
.expect_keyword(Keyword::FROM)
|
||||
.context(error::SyntaxSnafu)?;
|
||||
let (with, connection, location) = self.parse_copy_parameters()?;
|
||||
let (with, connection, location, limit) = self.parse_copy_parameters()?;
|
||||
Ok(CopyTable::From(CopyTableArgument {
|
||||
table_name,
|
||||
with: with.into(),
|
||||
connection: connection.into(),
|
||||
location,
|
||||
limit,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_copy_parameters(&mut self) -> Result<(With, Connection, String)> {
|
||||
fn parse_copy_parameters(&mut self) -> Result<(With, Connection, String, Option<u64>)> {
|
||||
let location =
|
||||
self.parser
|
||||
.parse_literal_string()
|
||||
@@ -142,7 +158,21 @@ impl<'a> ParserContext<'a> {
|
||||
.map(parse_option_string)
|
||||
.collect::<Result<Connection>>()?;
|
||||
|
||||
Ok((with, connection, location))
|
||||
let limit = if self.parser.parse_keyword(Keyword::LIMIT) {
|
||||
Some(
|
||||
self.parser
|
||||
.parse_literal_uint()
|
||||
.with_context(|_| error::UnexpectedSnafu {
|
||||
sql: self.sql,
|
||||
expected: "the number of maximum rows",
|
||||
actual: self.peek_token_as_string(),
|
||||
})?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok((with, connection, location, limit))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -104,17 +104,19 @@ impl<'a> ParserContext<'a> {
|
||||
let (start, end, step, lookback) = match parser.peek_token().token {
|
||||
Token::LParen => {
|
||||
let _consume_lparen_token = parser.next_token();
|
||||
let start = Self::parse_string_or_number_or_word(parser, Token::Comma)?;
|
||||
let end = Self::parse_string_or_number_or_word(parser, Token::Comma)?;
|
||||
let delimiter_token = Self::find_next_delimiter_token(parser);
|
||||
let (step, lookback) = if Self::is_comma(&delimiter_token) {
|
||||
let step = Self::parse_string_or_number_or_word(parser, Token::Comma)?;
|
||||
let lookback = Self::parse_string_or_number_or_word(parser, Token::RParen).ok();
|
||||
(step, lookback)
|
||||
let start = Self::parse_string_or_number_or_word(parser, &[Token::Comma])?.0;
|
||||
let end = Self::parse_string_or_number_or_word(parser, &[Token::Comma])?.0;
|
||||
|
||||
let (step, delimiter) =
|
||||
Self::parse_string_or_number_or_word(parser, &[Token::Comma, Token::RParen])?;
|
||||
let lookback = if delimiter == Token::Comma {
|
||||
Self::parse_string_or_number_or_word(parser, &[Token::RParen])
|
||||
.ok()
|
||||
.map(|t| t.0)
|
||||
} else {
|
||||
let step = Self::parse_string_or_number_or_word(parser, Token::RParen)?;
|
||||
(step, None)
|
||||
None
|
||||
};
|
||||
|
||||
(start, end, step, lookback)
|
||||
}
|
||||
_ => ("0".to_string(), "0".to_string(), "5m".to_string(), None),
|
||||
@@ -123,22 +125,8 @@ impl<'a> ParserContext<'a> {
|
||||
Ok(TqlParameters::new(start, end, step, lookback, query))
|
||||
}
|
||||
|
||||
fn find_next_delimiter_token(parser: &mut Parser) -> Token {
|
||||
let mut n: usize = 0;
|
||||
while !(Self::is_comma(&parser.peek_nth_token(n).token)
|
||||
|| Self::is_rparen(&parser.peek_nth_token(n).token))
|
||||
{
|
||||
n += 1;
|
||||
}
|
||||
parser.peek_nth_token(n).token
|
||||
}
|
||||
|
||||
pub fn is_delimiter_token(token: &Token, delimiter_token: &Token) -> bool {
|
||||
match token {
|
||||
Token::Comma => Self::is_comma(delimiter_token),
|
||||
Token::RParen => Self::is_rparen(delimiter_token),
|
||||
_ => false,
|
||||
}
|
||||
pub fn comma_or_rparen(token: &Token) -> bool {
|
||||
Self::is_comma(token) || Self::is_rparen(token)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -155,15 +143,21 @@ impl<'a> ParserContext<'a> {
|
||||
self.peek_token_as_string().eq_ignore_ascii_case(VERBOSE)
|
||||
}
|
||||
|
||||
/// Try to parse and consume a string, number or word token.
|
||||
/// Return `Ok` if it's parsed and one of the given delimiter tokens is consumed.
|
||||
/// The string and matched delimiter will be returned as a tuple.
|
||||
fn parse_string_or_number_or_word(
|
||||
parser: &mut Parser,
|
||||
delimiter_token: Token,
|
||||
) -> std::result::Result<String, TQLError> {
|
||||
delimiter_tokens: &[Token],
|
||||
) -> std::result::Result<(String, Token), TQLError> {
|
||||
let mut tokens = vec![];
|
||||
|
||||
while !Self::is_delimiter_token(&parser.peek_token().token, &delimiter_token) {
|
||||
let token = parser.next_token();
|
||||
tokens.push(token.token);
|
||||
while !delimiter_tokens.contains(&parser.peek_token().token) {
|
||||
let token = parser.next_token().token;
|
||||
if matches!(token, Token::EOF) {
|
||||
break;
|
||||
}
|
||||
tokens.push(token);
|
||||
}
|
||||
let result = match tokens.len() {
|
||||
0 => Err(ParserError::ParserError(
|
||||
@@ -186,8 +180,15 @@ impl<'a> ParserContext<'a> {
|
||||
}
|
||||
_ => Self::parse_tokens(tokens),
|
||||
};
|
||||
parser.expect_token(&delimiter_token).context(ParserSnafu)?;
|
||||
result
|
||||
for token in delimiter_tokens {
|
||||
if parser.consume_token(token) {
|
||||
return result.map(|v| (v, token.clone()));
|
||||
}
|
||||
}
|
||||
Err(ParserError::ParserError(format!(
|
||||
"Delimiters not match {delimiter_tokens:?}"
|
||||
)))
|
||||
.context(ParserSnafu)
|
||||
}
|
||||
|
||||
fn parse_tokens(tokens: Vec<Token>) -> std::result::Result<String, TQLError> {
|
||||
@@ -733,5 +734,11 @@ mod tests {
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
|
||||
assert!(result.output_msg().contains("empty TQL query"));
|
||||
|
||||
// invalid token
|
||||
let sql = "tql eval (0, 0, '1s) t;;';";
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
|
||||
assert!(result.output_msg().contains("Delimiters not match"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,10 @@ impl AlterTable {
|
||||
pub fn alter_operation(&self) -> &AlterTableOperation {
|
||||
&self.alter_operation
|
||||
}
|
||||
|
||||
pub fn alter_operation_mut(&mut self) -> &mut AlterTableOperation {
|
||||
&mut self.alter_operation
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for AlterTable {
|
||||
|
||||
@@ -111,6 +111,7 @@ pub struct CopyTableArgument {
|
||||
pub connection: OptionMap,
|
||||
/// Copy tbl [To|From] 'location'.
|
||||
pub location: String,
|
||||
pub limit: Option<u64>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -20,6 +20,7 @@ use sqlparser::ast::{
|
||||
};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::statements::alter::AlterTableOperation;
|
||||
use crate::statements::create::{CreateExternalTable, CreateTable};
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::statements::transform::TransformRule;
|
||||
@@ -51,6 +52,13 @@ impl TransformRule for TypeAliasTransformRule {
|
||||
.iter_mut()
|
||||
.for_each(|ColumnDef { data_type, .. }| replace_type_alias(data_type));
|
||||
}
|
||||
Statement::Alter(alter_table) => {
|
||||
if let AlterTableOperation::ChangeColumnType { target_type, .. } =
|
||||
alter_table.alter_operation_mut()
|
||||
{
|
||||
replace_type_alias(target_type)
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,8 @@ pub const DATA_REGION_SUBDIR: &str = "data";
|
||||
|
||||
pub const METRIC_ENGINE_NAME: &str = "metric";
|
||||
|
||||
pub const FILE_ENGINE_NAME: &str = "file";
|
||||
|
||||
/// Metadata key present in the `CREATE TABLE ... WITH ()` clause. This key is
|
||||
/// used to identify the table is a physical metric table. E.g.:
|
||||
/// ```sql
|
||||
@@ -70,3 +72,13 @@ pub const LOGICAL_TABLE_METADATA_KEY: &str = "on_physical_table";
|
||||
/// HashMap key to be used in the region server's extension response.
|
||||
/// Represent a list of column metadata that are added to physical table.
|
||||
pub const ALTER_PHYSICAL_EXTENSION_KEY: &str = "ALTER_PHYSICAL";
|
||||
|
||||
/// Returns true if it's a internal column of the metric engine.
|
||||
pub fn is_metric_engine_internal_column(name: &str) -> bool {
|
||||
name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME || name == DATA_SCHEMA_TSID_COLUMN_NAME
|
||||
}
|
||||
|
||||
/// Returns true if it's metric engine
|
||||
pub fn is_metric_engine(name: &str) -> bool {
|
||||
name == METRIC_ENGINE_NAME
|
||||
}
|
||||
|
||||
@@ -228,6 +228,7 @@ pub struct CopyTableRequest {
|
||||
pub pattern: Option<String>,
|
||||
pub direction: CopyDirection,
|
||||
pub timestamp_range: Option<TimestampRange>,
|
||||
pub limit: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
|
||||
@@ -39,6 +39,7 @@ common-wal.workspace = true
|
||||
datanode = { workspace = true }
|
||||
datatypes.workspace = true
|
||||
dotenv.workspace = true
|
||||
flow.workspace = true
|
||||
frontend = { workspace = true, features = ["testing"] }
|
||||
futures.workspace = true
|
||||
futures-util.workspace = true
|
||||
|
||||
@@ -35,6 +35,7 @@ use common_procedure::options::ProcedureConfig;
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
|
||||
use datanode::datanode::DatanodeBuilder;
|
||||
use flow::FlownodeBuilder;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
|
||||
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
|
||||
@@ -128,6 +129,7 @@ impl GreptimeDbStandaloneBuilder {
|
||||
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
table_metadata_manager.init().await.unwrap();
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
|
||||
let layered_cache_builder = LayeredCacheRegistryBuilder::default();
|
||||
@@ -149,7 +151,19 @@ impl GreptimeDbStandaloneBuilder {
|
||||
)
|
||||
.await;
|
||||
|
||||
let node_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
1, // for standalone mode this value is default to one
|
||||
Default::default(),
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
);
|
||||
let flownode = Arc::new(flow_builder.build().await);
|
||||
|
||||
let node_manager = Arc::new(StandaloneDatanodeManager {
|
||||
region_server: datanode.region_server(),
|
||||
flow_server: flownode.clone(),
|
||||
});
|
||||
|
||||
let table_id_sequence = Arc::new(
|
||||
SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
|
||||
@@ -204,6 +218,11 @@ impl GreptimeDbStandaloneBuilder {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
flownode
|
||||
.set_frontend_invoker(Box::new(instance.clone()))
|
||||
.await;
|
||||
let _node_handle = flownode.run_background();
|
||||
|
||||
procedure_manager.start().await.unwrap();
|
||||
wal_options_allocator.start().await.unwrap();
|
||||
|
||||
|
||||
@@ -36,8 +36,12 @@ macro_rules! sql_test {
|
||||
#[$meta]
|
||||
)*
|
||||
async fn [< $test >]() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let store_type = tests_integration::test_util::StorageType::$service;
|
||||
if store_type.test_on() {
|
||||
common_telemetry::info!("test {} starts, store_type: {:?}", stringify!($test), store_type);
|
||||
|
||||
let _ = $crate::sql::$test(store_type).await;
|
||||
}
|
||||
|
||||
@@ -427,8 +431,10 @@ pub async fn test_postgres_bytea(store_type: StorageType) {
|
||||
let (client, connection) = tokio_postgres::connect(&format!("postgres://{addr}/public"), NoTls)
|
||||
.await
|
||||
.unwrap();
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
tokio::spawn(async move {
|
||||
connection.await.unwrap();
|
||||
tx.send(()).unwrap();
|
||||
});
|
||||
let _ = client
|
||||
.simple_query("CREATE TABLE test(b BLOB, ts TIMESTAMP TIME INDEX)")
|
||||
@@ -481,6 +487,9 @@ pub async fn test_postgres_bytea(store_type: StorageType) {
|
||||
let val: Vec<u8> = row.get("b");
|
||||
assert_eq!(val, [97, 98, 99, 107, 108, 109, 42, 169, 84]);
|
||||
|
||||
drop(client);
|
||||
rx.await.unwrap();
|
||||
|
||||
let _ = fe_pg_server.shutdown().await;
|
||||
guard.remove_all().await;
|
||||
}
|
||||
@@ -492,8 +501,10 @@ pub async fn test_postgres_datestyle(store_type: StorageType) {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
tokio::spawn(async move {
|
||||
connection.await.unwrap();
|
||||
tx.send(()).unwrap();
|
||||
});
|
||||
|
||||
let validate_datestyle = |client: Client, datestyle: &str, is_valid: bool| {
|
||||
@@ -703,6 +714,9 @@ pub async fn test_postgres_datestyle(store_type: StorageType) {
|
||||
}
|
||||
}
|
||||
|
||||
drop(client);
|
||||
rx.await.unwrap();
|
||||
|
||||
let _ = fe_pg_server.shutdown().await;
|
||||
guard.remove_all().await;
|
||||
}
|
||||
@@ -714,8 +728,10 @@ pub async fn test_postgres_timezone(store_type: StorageType) {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
tokio::spawn(async move {
|
||||
connection.await.unwrap();
|
||||
tx.send(()).unwrap();
|
||||
});
|
||||
|
||||
let get_row = |mess: Vec<SimpleQueryMessage>| -> String {
|
||||
@@ -758,6 +774,10 @@ pub async fn test_postgres_timezone(store_type: StorageType) {
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(timezone, "UTC");
|
||||
|
||||
drop(client);
|
||||
rx.await.unwrap();
|
||||
|
||||
let _ = fe_pg_server.shutdown().await;
|
||||
guard.remove_all().await;
|
||||
}
|
||||
@@ -769,8 +789,10 @@ pub async fn test_postgres_parameter_inference(store_type: StorageType) {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
tokio::spawn(async move {
|
||||
connection.await.unwrap();
|
||||
tx.send(()).unwrap();
|
||||
});
|
||||
|
||||
// Create demo table
|
||||
@@ -796,6 +818,10 @@ pub async fn test_postgres_parameter_inference(store_type: StorageType) {
|
||||
|
||||
assert_eq!(1, rows.len());
|
||||
|
||||
// Shutdown the client.
|
||||
drop(client);
|
||||
rx.await.unwrap();
|
||||
|
||||
let _ = fe_pg_server.shutdown().await;
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
@@ -52,6 +52,14 @@ SELECT * FROM demo ORDER BY ts;
|
||||
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
|
||||
+-------+------+--------+---------------------+
|
||||
|
||||
DELETE FROM demo;
|
||||
|
||||
Affected Rows: 1
|
||||
|
||||
COPY DATABASE public FROM '/tmp/demo/export/parquet_range/' LIMIT 2;
|
||||
|
||||
Error: 2000(InvalidSyntax), Invalid SQL, error: limit is not supported
|
||||
|
||||
DROP TABLE demo;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -20,4 +20,8 @@ COPY DATABASE public FROM '/tmp/demo/export/parquet_range/';
|
||||
|
||||
SELECT * FROM demo ORDER BY ts;
|
||||
|
||||
DELETE FROM demo;
|
||||
|
||||
COPY DATABASE public FROM '/tmp/demo/export/parquet_range/' LIMIT 2;
|
||||
|
||||
DROP TABLE demo;
|
||||
|
||||
@@ -93,15 +93,15 @@ select count(*) from without_limit_rows;
|
||||
| 4 |
|
||||
+----------+
|
||||
|
||||
CREATE TABLE with_limit_rows(host string, cpu double, memory double, ts timestamp time index);
|
||||
CREATE TABLE with_limit_rows_segment(host string, cpu double, memory double, ts timestamp time index);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
Copy with_limit_rows FROM '/tmp/demo/export/parquet_files/' WITH (MAX_INSERT_ROWS = 2);
|
||||
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT 2;
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
select count(*) from with_limit_rows;
|
||||
select count(*) from with_limit_rows_segment;
|
||||
|
||||
+----------+
|
||||
| COUNT(*) |
|
||||
@@ -109,6 +109,10 @@ select count(*) from with_limit_rows;
|
||||
| 2 |
|
||||
+----------+
|
||||
|
||||
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT hello;
|
||||
|
||||
Error: 2000(InvalidSyntax), Unexpected token while parsing SQL statement: Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT hello;, expected: 'the number of maximum rows', found: ;: sql parser error: Expected literal int, found: hello at Line: 1, Column 75
|
||||
|
||||
drop table demo;
|
||||
|
||||
Affected Rows: 0
|
||||
@@ -133,7 +137,7 @@ drop table without_limit_rows;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table with_limit_rows;
|
||||
drop table with_limit_rows_segment;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
|
||||
@@ -34,11 +34,13 @@ Copy without_limit_rows FROM '/tmp/demo/export/parquet_files/';
|
||||
|
||||
select count(*) from without_limit_rows;
|
||||
|
||||
CREATE TABLE with_limit_rows(host string, cpu double, memory double, ts timestamp time index);
|
||||
CREATE TABLE with_limit_rows_segment(host string, cpu double, memory double, ts timestamp time index);
|
||||
|
||||
Copy with_limit_rows FROM '/tmp/demo/export/parquet_files/' WITH (MAX_INSERT_ROWS = 2);
|
||||
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT 2;
|
||||
|
||||
select count(*) from with_limit_rows;
|
||||
select count(*) from with_limit_rows_segment;
|
||||
|
||||
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT hello;
|
||||
|
||||
drop table demo;
|
||||
|
||||
@@ -52,4 +54,4 @@ drop table with_pattern;
|
||||
|
||||
drop table without_limit_rows;
|
||||
|
||||
drop table with_limit_rows;
|
||||
drop table with_limit_rows_segment;
|
||||
|
||||
121
tests/cases/standalone/common/promql/precisions.result
Normal file
121
tests/cases/standalone/common/promql/precisions.result
Normal file
@@ -0,0 +1,121 @@
|
||||
CREATE TABLE host_sec (
|
||||
ts timestamp(0) time index,
|
||||
host STRING PRIMARY KEY,
|
||||
val DOUBLE,
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO TABLE host_sec VALUES
|
||||
(0, 'host1', 1),
|
||||
(0, 'host2', 2),
|
||||
(5, 'host1', 3),
|
||||
(5, 'host2', 4),
|
||||
(10, 'host1', 5),
|
||||
(10, 'host2', 6),
|
||||
(15, 'host1', 7),
|
||||
(15, 'host2', 8);
|
||||
|
||||
Affected Rows: 8
|
||||
|
||||
CREATE TABLE host_micro (
|
||||
ts timestamp(6) time index,
|
||||
host STRING PRIMARY KEY,
|
||||
val DOUBLE,
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO TABLE host_micro VALUES
|
||||
(0, 'host1', 1),
|
||||
(0, 'host2', 2),
|
||||
(5000000, 'host1', 3),
|
||||
(5000000, 'host2', 4),
|
||||
(10000000, 'host1', 5),
|
||||
(10000000, 'host2', 6),
|
||||
(15000000, 'host1', 7),
|
||||
(15000000, 'host2', 8);
|
||||
|
||||
Affected Rows: 8
|
||||
|
||||
-- Test on Timestamps of different precisions
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') host_sec{host="host1"};
|
||||
|
||||
+-----+-------+---------------------+
|
||||
| val | host | ts |
|
||||
+-----+-------+---------------------+
|
||||
| 1.0 | host1 | 1970-01-01T00:00:00 |
|
||||
| 3.0 | host1 | 1970-01-01T00:00:05 |
|
||||
| 5.0 | host1 | 1970-01-01T00:00:10 |
|
||||
| 7.0 | host1 | 1970-01-01T00:00:15 |
|
||||
+-----+-------+---------------------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]);
|
||||
|
||||
+---------------------+----------------------------------+-------+
|
||||
| ts | prom_avg_over_time(ts_range,val) | host |
|
||||
+---------------------+----------------------------------+-------+
|
||||
| 1970-01-01T00:00:00 | 1.0 | host1 |
|
||||
| 1970-01-01T00:00:05 | 2.0 | host1 |
|
||||
| 1970-01-01T00:00:10 | 4.0 | host1 |
|
||||
| 1970-01-01T00:00:15 | 6.0 | host1 |
|
||||
+---------------------+----------------------------------+-------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') host_micro{host="host1"};
|
||||
|
||||
+-----+-------+---------------------+
|
||||
| val | host | ts |
|
||||
+-----+-------+---------------------+
|
||||
| 1.0 | host1 | 1970-01-01T00:00:00 |
|
||||
| 3.0 | host1 | 1970-01-01T00:00:05 |
|
||||
| 5.0 | host1 | 1970-01-01T00:00:10 |
|
||||
| 7.0 | host1 | 1970-01-01T00:00:15 |
|
||||
+-----+-------+---------------------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') avg_over_time(host_micro{host="host1"}[5s]);
|
||||
|
||||
+---------------------+----------------------------------+-------+
|
||||
| ts | prom_avg_over_time(ts_range,val) | host |
|
||||
+---------------------+----------------------------------+-------+
|
||||
| 1970-01-01T00:00:00 | 1.0 | host1 |
|
||||
| 1970-01-01T00:00:05 | 2.0 | host1 |
|
||||
| 1970-01-01T00:00:10 | 4.0 | host1 |
|
||||
| 1970-01-01T00:00:15 | 6.0 | host1 |
|
||||
+---------------------+----------------------------------+-------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') host_sec{host="host1"} + host_micro{host="host1"};
|
||||
|
||||
+-------+---------------------+-------------------------------+
|
||||
| host | ts | host_sec.val + host_micro.val |
|
||||
+-------+---------------------+-------------------------------+
|
||||
| host1 | 1970-01-01T00:00:00 | 2.0 |
|
||||
| host1 | 1970-01-01T00:00:05 | 6.0 |
|
||||
| host1 | 1970-01-01T00:00:10 | 10.0 |
|
||||
| host1 | 1970-01-01T00:00:15 | 14.0 |
|
||||
+-------+---------------------+-------------------------------+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]) + avg_over_time(host_micro{host="host1"}[5s]);
|
||||
|
||||
+-------+---------------------+-----------------------------------------------------------------------------------------+
|
||||
| host | ts | host_sec.prom_avg_over_time(ts_range,val) + host_micro.prom_avg_over_time(ts_range,val) |
|
||||
+-------+---------------------+-----------------------------------------------------------------------------------------+
|
||||
| host1 | 1970-01-01T00:00:00 | 2.0 |
|
||||
| host1 | 1970-01-01T00:00:05 | 4.0 |
|
||||
| host1 | 1970-01-01T00:00:10 | 8.0 |
|
||||
| host1 | 1970-01-01T00:00:15 | 12.0 |
|
||||
+-------+---------------------+-----------------------------------------------------------------------------------------+
|
||||
|
||||
DROP TABLE host_sec;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE host_micro;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
55
tests/cases/standalone/common/promql/precisions.sql
Normal file
55
tests/cases/standalone/common/promql/precisions.sql
Normal file
@@ -0,0 +1,55 @@
|
||||
CREATE TABLE host_sec (
|
||||
ts timestamp(0) time index,
|
||||
host STRING PRIMARY KEY,
|
||||
val DOUBLE,
|
||||
);
|
||||
|
||||
INSERT INTO TABLE host_sec VALUES
|
||||
(0, 'host1', 1),
|
||||
(0, 'host2', 2),
|
||||
(5, 'host1', 3),
|
||||
(5, 'host2', 4),
|
||||
(10, 'host1', 5),
|
||||
(10, 'host2', 6),
|
||||
(15, 'host1', 7),
|
||||
(15, 'host2', 8);
|
||||
|
||||
CREATE TABLE host_micro (
|
||||
ts timestamp(6) time index,
|
||||
host STRING PRIMARY KEY,
|
||||
val DOUBLE,
|
||||
);
|
||||
|
||||
INSERT INTO TABLE host_micro VALUES
|
||||
(0, 'host1', 1),
|
||||
(0, 'host2', 2),
|
||||
(5000000, 'host1', 3),
|
||||
(5000000, 'host2', 4),
|
||||
(10000000, 'host1', 5),
|
||||
(10000000, 'host2', 6),
|
||||
(15000000, 'host1', 7),
|
||||
(15000000, 'host2', 8);
|
||||
|
||||
-- Test on Timestamps of different precisions
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') host_sec{host="host1"};
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]);
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') host_micro{host="host1"};
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') avg_over_time(host_micro{host="host1"}[5s]);
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') host_sec{host="host1"} + host_micro{host="host1"};
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]) + avg_over_time(host_micro{host="host1"}[5s]);
|
||||
|
||||
DROP TABLE host_sec;
|
||||
|
||||
DROP TABLE host_micro;
|
||||
@@ -98,11 +98,11 @@ Error: 3000(PlanQuery), DataFusion error: Error during planning: duration must b
|
||||
|
||||
SELECT min(val) RANGE '5s' FROM host ALIGN (INTERVAL '0' day);
|
||||
|
||||
Error: 2000(InvalidSyntax), Range Query: Can't use 0 as align in Range Query
|
||||
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("0")` in range select query
|
||||
|
||||
SELECT min(val) RANGE (INTERVAL '0' day) FROM host ALIGN '5s';
|
||||
|
||||
Error: 2000(InvalidSyntax), Range Query: Invalid Range expr `MIN(host.val) RANGE IntervalMonthDayNano("0")`, Can't use 0 as range in Range Query
|
||||
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("0")` in range select query
|
||||
|
||||
DROP TABLE host;
|
||||
|
||||
|
||||
@@ -82,6 +82,30 @@ SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day)
|
||||
| 2024-01-24T23:00:00 | 3 |
|
||||
+---------------------+------------------------------------------------------------------+
|
||||
|
||||
SELECT ts, min(val) RANGE (INTERVAL '2' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '2' day - INTERVAL '1' day) TO (now() - (now() + INTERVAL '1' hour)) by (1) ORDER BY ts;
|
||||
|
||||
+---------------------+-----------------------------------------------------------------------------------------------------------------+
|
||||
| ts | MIN(host.val) RANGE IntervalMonthDayNano("36893488147419103232") - IntervalMonthDayNano("18446744073709551616") |
|
||||
+---------------------+-----------------------------------------------------------------------------------------------------------------+
|
||||
| 2024-01-22T23:00:00 | 0 |
|
||||
| 2024-01-23T23:00:00 | 1 |
|
||||
| 2024-01-24T23:00:00 | 3 |
|
||||
+---------------------+-----------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
-- non-positive duration
|
||||
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '2' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("18446744073709551616") - IntervalMonthDayNano("36893488147419103232")` in range select query
|
||||
|
||||
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("18446744073709551616") - IntervalMonthDayNano("18446744073709551616")` in range select query
|
||||
|
||||
-- duration not all interval
|
||||
SELECT ts, min(val) RANGE (now() - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `now() - IntervalMonthDayNano("18446744073709551616")` in range select query
|
||||
|
||||
--- ALIGN TO with time zone ---
|
||||
set time_zone='Asia/Shanghai';
|
||||
|
||||
|
||||
@@ -26,6 +26,18 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:0
|
||||
|
||||
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
SELECT ts, min(val) RANGE (INTERVAL '2' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '2' day - INTERVAL '1' day) TO (now() - (now() + INTERVAL '1' hour)) by (1) ORDER BY ts;
|
||||
|
||||
-- non-positive duration
|
||||
|
||||
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '2' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
-- duration not all interval
|
||||
|
||||
SELECT ts, min(val) RANGE (now() - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
|
||||
|
||||
--- ALIGN TO with time zone ---
|
||||
set time_zone='Asia/Shanghai';
|
||||
|
||||
|
||||
@@ -95,3 +95,100 @@ WITH(
|
||||
|
||||
Error: 1004(InvalidArguments), Object store not found: S3
|
||||
|
||||
CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
show create table phy;
|
||||
|
||||
+-------+------------------------------------+
|
||||
| Table | Create Table |
|
||||
+-------+------------------------------------+
|
||||
| phy | CREATE TABLE IF NOT EXISTS "phy" ( |
|
||||
| | "ts" TIMESTAMP(3) NOT NULL, |
|
||||
| | "val" DOUBLE NULL, |
|
||||
| | "host" STRING NULL, |
|
||||
| | TIME INDEX ("ts"), |
|
||||
| | PRIMARY KEY ("host") |
|
||||
| | ) |
|
||||
| | |
|
||||
| | ENGINE=metric |
|
||||
| | WITH( |
|
||||
| | physical_metric_table = '' |
|
||||
| | ) |
|
||||
+-------+------------------------------------+
|
||||
|
||||
show create table t1;
|
||||
|
||||
+-------+-----------------------------------+
|
||||
| Table | Create Table |
|
||||
+-------+-----------------------------------+
|
||||
| t1 | CREATE TABLE IF NOT EXISTS "t1" ( |
|
||||
| | "host" STRING NULL, |
|
||||
| | "ts" TIMESTAMP(3) NOT NULL, |
|
||||
| | "val" DOUBLE NULL, |
|
||||
| | TIME INDEX ("ts"), |
|
||||
| | PRIMARY KEY ("host") |
|
||||
| | ) |
|
||||
| | |
|
||||
| | ENGINE=metric |
|
||||
| | WITH( |
|
||||
| | on_physical_table = 'phy' |
|
||||
| | ) |
|
||||
+-------+-----------------------------------+
|
||||
|
||||
drop table t1;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table phy;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "phy" (
|
||||
"ts" TIMESTAMP(3) NOT NULL,
|
||||
"val" DOUBLE NULL,
|
||||
"__table_id" INT UNSIGNED NOT NULL,
|
||||
"__tsid" BIGINT UNSIGNED NOT NULL,
|
||||
"host" STRING NULL,
|
||||
"job" STRING NULL,
|
||||
TIME INDEX ("ts"),
|
||||
PRIMARY KEY ("__table_id", "__tsid", "host", "job")
|
||||
)
|
||||
ENGINE=mito
|
||||
WITH(
|
||||
physical_metric_table = '',
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
show create table phy;
|
||||
|
||||
+-------+-------------------------------------------------------+
|
||||
| Table | Create Table |
|
||||
+-------+-------------------------------------------------------+
|
||||
| phy | CREATE TABLE IF NOT EXISTS "phy" ( |
|
||||
| | "ts" TIMESTAMP(3) NOT NULL, |
|
||||
| | "val" DOUBLE NULL, |
|
||||
| | "__table_id" INT UNSIGNED NOT NULL, |
|
||||
| | "__tsid" BIGINT UNSIGNED NOT NULL, |
|
||||
| | "host" STRING NULL, |
|
||||
| | "job" STRING NULL, |
|
||||
| | TIME INDEX ("ts"), |
|
||||
| | PRIMARY KEY ("__table_id", "__tsid", "host", "job") |
|
||||
| | ) |
|
||||
| | |
|
||||
| | ENGINE=mito |
|
||||
| | WITH( |
|
||||
| | physical_metric_table = '' |
|
||||
| | ) |
|
||||
+-------+-------------------------------------------------------+
|
||||
|
||||
drop table phy;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
|
||||
@@ -48,3 +48,34 @@ ENGINE=mito
|
||||
WITH(
|
||||
storage = 'S3'
|
||||
);
|
||||
|
||||
CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "");
|
||||
|
||||
CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy");
|
||||
|
||||
show create table phy;
|
||||
|
||||
show create table t1;
|
||||
|
||||
drop table t1;
|
||||
|
||||
drop table phy;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "phy" (
|
||||
"ts" TIMESTAMP(3) NOT NULL,
|
||||
"val" DOUBLE NULL,
|
||||
"__table_id" INT UNSIGNED NOT NULL,
|
||||
"__tsid" BIGINT UNSIGNED NOT NULL,
|
||||
"host" STRING NULL,
|
||||
"job" STRING NULL,
|
||||
TIME INDEX ("ts"),
|
||||
PRIMARY KEY ("__table_id", "__tsid", "host", "job")
|
||||
)
|
||||
ENGINE=mito
|
||||
WITH(
|
||||
physical_metric_table = '',
|
||||
);
|
||||
|
||||
show create table phy;
|
||||
|
||||
drop table phy;
|
||||
|
||||
Reference in New Issue
Block a user