Compare commits

...

20 Commits

Author SHA1 Message Date
Weny Xu
258675b75e chore: bump to v0.8.0 (#3971) 2024-05-17 15:05:20 +00:00
Weny Xu
11a08cb272 feat(cli): prevent exporting physical table data (#3978)
* feat: prevent exporting physical table data

* chore: apply suggestions from CR
2024-05-17 14:58:10 +00:00
Ruihang Xia
e9b178b8b9 fix: tql parser hang on abnormal input (#3977)
* fix: tql parser hang on abnormal input

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* apply review sugg

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-05-17 14:22:20 +00:00
discord9
3477fde0e5 feat(flow): tumble window func (#3968)
* feat(WIP): tumble window rewrite parser

* tests: tumble func

* feat: add `update_at` column for all flow output

* chore: cleanup per review

* fix: update_at not as time index

* fix: demo tumble

* fix: tests&tumble signature&accept both ts&datetime

* refactor: update_at now ts millis type

* chore: per review advices
2024-05-17 12:10:28 +00:00
dennis zhuang
9baa431656 fix: changing column data type can't process type alias (#3972) 2024-05-17 11:34:31 +00:00
WU Jingdi
e2a1cb5840 feat: support evaluate expr in range query param (#3823)
* feat: support evaluate expr in range query param

* chore: fix comment

* chore: fix code comment

* fix: disbale now in duration param
2024-05-17 08:31:55 +00:00
Weny Xu
f696f41a02 fix: prevent registering logical regions with AliveKeeper (#3965)
* fix: register logical region

* chore: fix Clippy

* chore: apply suggestions from CR
2024-05-17 07:38:35 +00:00
Weny Xu
0168d43d60 fix: prevent exporting metric physical table data (#3970) 2024-05-17 07:19:28 +00:00
Yingwen
e372e25e30 build: add RUSTUP_WINDOWS_PATH_ADD_BIN env (#3969)
build: add RUSTUP_WINDOWS_PATH_ADD_BIN: 1
2024-05-17 06:01:46 +00:00
zyy17
ca409a732f refactor(naming): use the better naming for pubsub (#3960) 2024-05-17 03:00:15 +00:00
Ruihang Xia
5c0a530ad1 feat: skip read-only region when trying to flush on region full (#3966)
* feat: skip read-only region when trying to flush on region full

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* improve log

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* also skip in periodically

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-05-16 14:56:43 +00:00
Ruihang Xia
4b030456f6 feat: remove timeout in the channel between frontend and datanode (#3962)
* style: change builder pattern

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: remove timeout

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unused config

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update docs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-05-16 14:12:42 +00:00
irenjj
f93b5b19f0 feat: limit total rows copied in COPY TABLE FROM with LIMIT segment (#3910)
* feat: limit total rows copied in COPY TABLE FROM with LIMIT segment

* fmt

* disable default limit

* fix: check parse

* fix test, add error case

* fix: forbide LIMIT in database

* fix: only support LIMIT segment

* fix: simplify

* fix

* fix

* fix

* fix

* fix: test

* fix: change error info

* fix clippy

* fix: fix error msg

* fix test

* fix: test error info
2024-05-16 13:39:26 +00:00
Yingwen
669a6d84e9 test: gracefully shutdown postgres client in sql tests (#3958)
* chore: debug log

* test: gracefully shutdown pg client
2024-05-16 11:50:45 +00:00
discord9
a45017ad71 feat(flow): expire arrange according to time_index type (#3956)
* feat: render_reduce's arrangement expire after time passed

* feat: set expire when create flow
2024-05-16 11:41:03 +00:00
discord9
0d9e71b653 feat(flow): flow node manager (#3954)
* feat(flow): flow node manager

feat(flow): render src/sink

feat(flow): flow node manager in standalone

fix?: higher run freq

chore: remove abunant error enum variant

fix: run with higher freq if insert more

chore: fix after rebase

chore: typos

* chore(WIP): per review

* chore: per review
2024-05-16 11:37:14 +00:00
discord9
93f178f3ad feat(flow): avg func rewrite to sum/count (#3955)
* feat(WIP): parse avg

* feat: RelationType::apply_mfp no need expr typs

* feat: avg&tests

* fix(WIP): avg eval

* fix: sum ret correct type

* chore: typos
2024-05-16 10:03:56 +00:00
WU Jingdi
9f4a6c6fe2 feat: support any precision in PromQL (#3933)
* feat: support any precision in PromQL

* chore: add test
2024-05-16 07:00:24 +00:00
Weny Xu
c915916b62 feat(cli): export metric physical tables first (#3949)
* feat: export metric physical tables first

* chore: apply suggestions from CR
2024-05-16 06:30:20 +00:00
Weny Xu
dff7ba7598 feat: ignore internal columns in SHOW CREATE TABLE (#3950)
* feat: ignore internal columns

* chore: add new line

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-05-16 06:28:48 +00:00
90 changed files with 4060 additions and 583 deletions

View File

@@ -59,6 +59,9 @@ runs:
if: ${{ inputs.disable-run-tests == 'false' }}
shell: pwsh
run: make test sqlness-test
env:
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
RUST_BACKTRACE: 1
- name: Upload sqlness logs
if: ${{ failure() }} # Only upload logs when the integration tests failed.

View File

@@ -104,6 +104,7 @@ jobs:
CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}

View File

@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.8.0
NEXT_RELEASE_VERSION: v0.9.0
jobs:
allocate-runners:

158
Cargo.lock generated
View File

@@ -214,7 +214,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]]
name = "api"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"common-base",
"common-decimal",
@@ -703,7 +703,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -877,7 +877,7 @@ dependencies = [
[[package]]
name = "benchmarks"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arrow",
@@ -1220,7 +1220,7 @@ dependencies = [
[[package]]
name = "cache"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"catalog",
"common-error",
@@ -1254,7 +1254,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arrow",
@@ -1540,7 +1540,7 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
[[package]]
name = "client"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arc-swap",
@@ -1569,7 +1569,7 @@ dependencies = [
"serde_json",
"snafu 0.8.2",
"substrait 0.17.1",
"substrait 0.7.2",
"substrait 0.8.0",
"tokio",
"tokio-stream",
"tonic 0.11.0",
@@ -1599,7 +1599,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"auth",
@@ -1613,6 +1613,7 @@ dependencies = [
"common-catalog",
"common-config",
"common-error",
"common-grpc",
"common-macro",
"common-meta",
"common-procedure",
@@ -1629,6 +1630,7 @@ dependencies = [
"either",
"etcd-client",
"file-engine",
"flow",
"frontend",
"futures",
"human-panic",
@@ -1653,7 +1655,7 @@ dependencies = [
"session",
"snafu 0.8.2",
"store-api",
"substrait 0.7.2",
"substrait 0.8.0",
"table",
"temp-env",
"tempfile",
@@ -1697,7 +1699,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"anymap",
"bitvec",
@@ -1713,7 +1715,7 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"chrono",
"common-error",
@@ -1724,7 +1726,7 @@ dependencies = [
[[package]]
name = "common-config"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"common-base",
"common-error",
@@ -1747,7 +1749,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"arrow",
"arrow-schema",
@@ -1779,7 +1781,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"bigdecimal",
"common-error",
@@ -1792,7 +1794,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"snafu 0.8.2",
"strum 0.25.0",
@@ -1800,7 +1802,7 @@ dependencies = [
[[package]]
name = "common-frontend"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -1815,7 +1817,7 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arc-swap",
@@ -1848,7 +1850,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"common-runtime",
@@ -1865,7 +1867,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arrow-flight",
@@ -1891,7 +1893,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"common-base",
@@ -1908,7 +1910,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"arc-swap",
"common-query",
@@ -1923,7 +1925,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"common-error",
"common-macro",
@@ -1936,7 +1938,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"anymap2",
"api",
@@ -1989,11 +1991,11 @@ dependencies = [
[[package]]
name = "common-plugins"
version = "0.7.2"
version = "0.8.0"
[[package]]
name = "common-procedure"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-stream",
"async-trait",
@@ -2018,7 +2020,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"common-procedure",
@@ -2026,7 +2028,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -2041,7 +2043,7 @@ dependencies = [
"datatypes",
"serde",
"snafu 0.8.2",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
"sqlparser_derive 0.1.1",
"statrs",
"tokio",
@@ -2049,7 +2051,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"arc-swap",
"common-error",
@@ -2068,7 +2070,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"common-error",
@@ -2088,7 +2090,7 @@ dependencies = [
[[package]]
name = "common-telemetry"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"atty",
"backtrace",
@@ -2115,7 +2117,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"client",
"common-query",
@@ -2127,7 +2129,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"arrow",
"chrono",
@@ -2143,7 +2145,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"build-data",
"schemars",
@@ -2152,7 +2154,7 @@ dependencies = [
[[package]]
name = "common-wal"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"common-base",
"common-error",
@@ -3152,7 +3154,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arrow-flight",
@@ -3201,7 +3203,7 @@ dependencies = [
"session",
"snafu 0.8.2",
"store-api",
"substrait 0.7.2",
"substrait 0.8.0",
"table",
"tokio",
"toml 0.8.12",
@@ -3210,7 +3212,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"arrow",
"arrow-array",
@@ -3721,7 +3723,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -3823,7 +3825,7 @@ dependencies = [
[[package]]
name = "flow"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -3833,8 +3835,11 @@ dependencies = [
"common-decimal",
"common-error",
"common-frontend",
"common-function",
"common-macro",
"common-meta",
"common-query",
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-time",
@@ -3861,7 +3866,7 @@ dependencies = [
"snafu 0.8.2",
"store-api",
"strum 0.25.0",
"substrait 0.7.2",
"substrait 0.8.0",
"table",
"tokio",
"tonic 0.11.0",
@@ -3899,7 +3904,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "frontend"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arc-swap",
@@ -3945,7 +3950,7 @@ dependencies = [
"session",
"snafu 0.8.2",
"sql",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
"store-api",
"strfmt",
"table",
@@ -4717,7 +4722,7 @@ dependencies = [
[[package]]
name = "index"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -5284,7 +5289,7 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "log-store"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-stream",
"async-trait",
@@ -5580,7 +5585,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -5606,7 +5611,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -5682,7 +5687,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"aquamarine",
@@ -5764,7 +5769,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"aquamarine",
@@ -6383,7 +6388,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"anyhow",
"async-trait",
@@ -6624,7 +6629,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -6668,9 +6673,9 @@ dependencies = [
"session",
"snafu 0.8.2",
"sql",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
"store-api",
"substrait 0.7.2",
"substrait 0.8.0",
"table",
"tokio",
"tonic 0.11.0",
@@ -6914,7 +6919,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"async-trait",
@@ -6930,7 +6935,7 @@ dependencies = [
"serde_json",
"snafu 0.8.2",
"sql",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
"store-api",
"table",
]
@@ -7260,7 +7265,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"auth",
"common-base",
@@ -7538,7 +7543,7 @@ dependencies = [
[[package]]
name = "promql"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"ahash 0.8.11",
"async-recursion",
@@ -7751,7 +7756,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"bitflags 2.5.0",
@@ -7862,7 +7867,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -7919,7 +7924,7 @@ dependencies = [
"stats-cli",
"store-api",
"streaming-stats",
"substrait 0.7.2",
"substrait 0.8.0",
"table",
"tokio",
"tokio-stream",
@@ -9226,7 +9231,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "script"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arc-swap",
@@ -9496,7 +9501,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"aide",
"api",
@@ -9600,7 +9605,7 @@ dependencies = [
[[package]]
name = "session"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arc-swap",
@@ -9878,7 +9883,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"chrono",
@@ -9901,7 +9906,7 @@ dependencies = [
"lazy_static",
"regex",
"snafu 0.8.2",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
"sqlparser_derive 0.1.1",
"table",
]
@@ -9934,7 +9939,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"clap 4.5.4",
@@ -9965,13 +9970,13 @@ dependencies = [
[[package]]
name = "sqlparser"
version = "0.44.0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0#c919990bf62ad38d2b0c0a3bc90b26ad919d51b0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d#e4e496b8d62416ad50ce70a1b460c7313610cf5d"
dependencies = [
"lazy_static",
"log",
"regex",
"sqlparser 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
]
[[package]]
@@ -9999,7 +10004,7 @@ dependencies = [
[[package]]
name = "sqlparser_derive"
version = "0.2.2"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0#c919990bf62ad38d2b0c0a3bc90b26ad919d51b0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d#e4e496b8d62416ad50ce70a1b460c7313610cf5d"
dependencies = [
"proc-macro2",
"quote",
@@ -10152,7 +10157,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"aquamarine",
@@ -10318,7 +10323,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"bytes",
@@ -10509,7 +10514,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"async-trait",
"chrono",
@@ -10618,7 +10623,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "tests-fuzz"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"arbitrary",
"async-trait",
@@ -10643,7 +10648,7 @@ dependencies = [
"serde_json",
"snafu 0.8.2",
"sql",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=c919990bf62ad38d2b0c0a3bc90b26ad919d51b0)",
"sqlparser 0.44.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e4e496b8d62416ad50ce70a1b460c7313610cf5d)",
"sqlx",
"tinytemplate",
"tokio",
@@ -10651,7 +10656,7 @@ dependencies = [
[[package]]
name = "tests-integration"
version = "0.7.2"
version = "0.8.0"
dependencies = [
"api",
"arrow-flight",
@@ -10683,6 +10688,7 @@ dependencies = [
"datanode",
"datatypes",
"dotenv",
"flow",
"frontend",
"futures",
"futures-util",
@@ -10709,7 +10715,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.7.2",
"substrait 0.8.0",
"table",
"tempfile",
"time",

View File

@@ -64,7 +64,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.7.2"
version = "0.8.0"
edition = "2021"
license = "Apache-2.0"
@@ -159,7 +159,7 @@ smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sysinfo = "0.30"
# on branch v0.44.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "c919990bf62ad38d2b0c0a3bc90b26ad919d51b0", features = [
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e4e496b8d62416ad50ce70a1b460c7313610cf5d", features = [
"visitor",
] }
strum = { version = "0.25", features = ["derive"] }

View File

@@ -186,7 +186,6 @@
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
| `datanode` | -- | -- | Datanode options. |
| `datanode.client` | -- | -- | Datanode client options. |
| `datanode.client.timeout` | String | `10s` | -- |
| `datanode.client.connect_timeout` | String | `10s` | -- |
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
| `logging` | -- | -- | The logging options. |

View File

@@ -136,7 +136,6 @@ metadata_cache_tti = "5m"
[datanode]
## Datanode client options.
[datanode.client]
timeout = "10s"
connect_timeout = "10s"
tcp_nodelay = true

View File

@@ -18,7 +18,7 @@ use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
use snafu::{location, Location, Snafu};
use tonic::{Code, Status};
#[derive(Snafu)]
@@ -83,14 +83,28 @@ pub enum Error {
},
#[snafu(display("Failed to request RegionServer, code: {}", code))]
RegionServer { code: Code, source: BoxedError },
RegionServer {
code: Code,
source: BoxedError,
#[snafu(implicit)]
location: Location,
},
// Server error carried in Tonic Status's metadata.
#[snafu(display("{}", msg))]
Server { code: StatusCode, msg: String },
Server {
code: StatusCode,
msg: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Illegal Database response: {err_msg}"))]
IllegalDatabaseResponse { err_msg: String },
IllegalDatabaseResponse {
err_msg: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to send request with streaming: {}", err_msg))]
ClientStreaming {
@@ -148,7 +162,11 @@ impl From<Status> for Error {
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
.unwrap_or_else(|| e.message().to_string());
Self::Server { code, msg }
Self::Server {
code,
msg,
location: location!(),
}
}
}

View File

@@ -189,6 +189,7 @@ impl RegionRequester {
error::Error::RegionServer {
code,
source: BoxedError::new(err),
location: location!(),
}
})?
.into_inner();
@@ -272,7 +273,7 @@ mod test {
err_msg: "blabla".to_string(),
}),
}));
let Server { code, msg } = result.unwrap_err() else {
let Server { code, msg, .. } = result.unwrap_err() else {
unreachable!()
};
assert_eq!(code, StatusCode::Internal);

View File

@@ -28,6 +28,7 @@ common-base.workspace = true
common-catalog.workspace = true
common-config.workspace = true
common-error.workspace = true
common-grpc.workspace = true
common-macro.workspace = true
common-meta.workspace = true
common-procedure.workspace = true
@@ -45,6 +46,7 @@ datatypes.workspace = true
either = "1.8"
etcd-client.workspace = true
file-engine.workspace = true
flow.workspace = true
frontend.workspace = true
futures.workspace = true
human-panic = "1.2.2"

View File

@@ -64,6 +64,10 @@ impl App for Instance {
self.tool.do_work().await
}
fn wait_signal(&self) -> bool {
false
}
async fn stop(&self) -> Result<()> {
Ok(())
}

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::path::Path;
use std::sync::Arc;
@@ -28,6 +29,7 @@ use snafu::{OptionExt, ResultExt};
use tokio::fs::File;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::Semaphore;
use tokio::time::Instant;
use crate::cli::{Instance, Tool};
use crate::error::{
@@ -174,8 +176,34 @@ impl Export {
}
/// Return a list of [`TableReference`] to be exported.
/// Includes all tables under the given `catalog` and `schema`
async fn get_table_list(&self, catalog: &str, schema: &str) -> Result<Vec<TableReference>> {
/// Includes all tables under the given `catalog` and `schema`.
async fn get_table_list(
&self,
catalog: &str,
schema: &str,
) -> Result<(Vec<TableReference>, Vec<TableReference>)> {
// Puts all metric table first
let sql = format!(
"select table_catalog, table_schema, table_name from \
information_schema.columns where column_name = '__tsid' \
and table_catalog = \'{catalog}\' and table_schema = \'{schema}\'"
);
let result = self.sql(&sql).await?;
let Some(records) = result else {
EmptyResultSnafu.fail()?
};
let mut metric_physical_tables = HashSet::with_capacity(records.len());
for value in records {
let mut t = Vec::with_capacity(3);
for v in &value {
let serde_json::Value::String(value) = v else {
unreachable!()
};
t.push(value);
}
metric_physical_tables.insert((t[0].clone(), t[1].clone(), t[2].clone()));
}
// TODO: SQL injection hurts
let sql = format!(
"select table_catalog, table_schema, table_name from \
@@ -190,10 +218,10 @@ impl Export {
debug!("Fetched table list: {:?}", records);
if records.is_empty() {
return Ok(vec![]);
return Ok((vec![], vec![]));
}
let mut result = Vec::with_capacity(records.len());
let mut remaining_tables = Vec::with_capacity(records.len());
for value in records {
let mut t = Vec::with_capacity(3);
for v in &value {
@@ -202,10 +230,17 @@ impl Export {
};
t.push(value);
}
result.push((t[0].clone(), t[1].clone(), t[2].clone()));
let table = (t[0].clone(), t[1].clone(), t[2].clone());
// Ignores the physical table
if !metric_physical_tables.contains(&table) {
remaining_tables.push(table);
}
}
Ok(result)
Ok((
metric_physical_tables.into_iter().collect(),
remaining_tables,
))
}
async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
@@ -225,6 +260,7 @@ impl Export {
}
async fn export_create_table(&self) -> Result<()> {
let timer = Instant::now();
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.iter_db_names().await?;
let db_count = db_names.len();
@@ -233,15 +269,16 @@ impl Export {
let semaphore_moved = semaphore.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let table_list = self.get_table_list(&catalog, &schema).await?;
let table_count = table_list.len();
let (metric_physical_tables, remaining_tables) =
self.get_table_list(&catalog, &schema).await?;
let table_count = metric_physical_tables.len() + remaining_tables.len();
tokio::fs::create_dir_all(&self.output_dir)
.await
.context(FileIoSnafu)?;
let output_file =
Path::new(&self.output_dir).join(format!("{catalog}-{schema}.sql"));
let mut file = File::create(output_file).await.context(FileIoSnafu)?;
for (c, s, t) in table_list {
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
match self.show_create_table(&c, &s, &t).await {
Err(e) => {
error!(e; r#"Failed to export table "{}"."{}"."{}""#, c, s, t)
@@ -270,12 +307,14 @@ impl Export {
})
.count();
info!("success {success}/{db_count} jobs");
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, cost: {:?}", elapsed);
Ok(())
}
async fn export_table_data(&self) -> Result<()> {
let timer = Instant::now();
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.iter_db_names().await?;
let db_count = db_names.len();
@@ -288,15 +327,25 @@ impl Export {
.await
.context(FileIoSnafu)?;
let output_dir = Path::new(&self.output_dir).join(format!("{catalog}-{schema}/"));
// copy database to
let sql = format!(
"copy database {} to '{}' with (format='parquet');",
schema,
output_dir.to_str().unwrap()
);
self.sql(&sql).await?;
info!("finished exporting {catalog}.{schema} data");
// Ignores metric physical tables
let (metrics_tables, table_list) = self.get_table_list(&catalog, &schema).await?;
for (_, _, table_name) in metrics_tables {
warn!("Ignores metric physical table: {table_name}");
}
for (catalog_name, schema_name, table_name) in table_list {
// copy table to
let sql = format!(
r#"Copy "{}"."{}"."{}" TO '{}{}.parquet' WITH (format='parquet');"#,
catalog_name,
schema_name,
table_name,
output_dir.to_str().unwrap(),
table_name,
);
info!("Executing sql: {sql}");
self.sql(&sql).await?;
}
info!("Finished exporting {catalog}.{schema} data");
// export copy from sql
let dir_filenames = match output_dir.read_dir() {
@@ -351,8 +400,8 @@ impl Export {
}
})
.count();
info!("success {success}/{db_count} jobs");
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {:?}", elapsed);
Ok(())
}

View File

@@ -24,6 +24,7 @@ use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager, Me
use clap::Parser;
use client::client_manager::DatanodeClients;
use common_config::Configurable;
use common_grpc::channel_manager::ChannelConfig;
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
use common_meta::heartbeat::handler::HandlerGroupExecutor;
@@ -318,11 +319,19 @@ impl StartCommand {
Arc::new(executor),
);
// frontend to datanode need not timeout.
// Some queries are expected to take long time.
let channel_config = ChannelConfig {
timeout: None,
..Default::default()
};
let client = DatanodeClients::new(channel_config);
let mut instance = FrontendBuilder::new(
cached_meta_backend.clone(),
layered_cache_registry.clone(),
catalog_manager,
Arc::new(DatanodeClients::default()),
Arc::new(client),
meta_client,
)
.with_plugin(plugins.clone())

View File

@@ -41,6 +41,11 @@ pub trait App: Send {
async fn start(&mut self) -> error::Result<()>;
/// Waits the quit signal by default.
fn wait_signal(&self) -> bool {
true
}
async fn stop(&self) -> error::Result<()>;
}
@@ -51,11 +56,13 @@ pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
app.start().await?;
if let Err(e) = tokio::signal::ctrl_c().await {
error!("Failed to listen for ctrl-c signal: {}", e);
// It's unusual to fail to listen for ctrl-c signal, maybe there's something unexpected in
// the underlying system. So we stop the app instead of running nonetheless to let people
// investigate the issue.
if app.wait_signal() {
if let Err(e) = tokio::signal::ctrl_c().await {
error!("Failed to listen for ctrl-c signal: {}", e);
// It's unusual to fail to listen for ctrl-c signal, maybe there's something unexpected in
// the underlying system. So we stop the app instead of running nonetheless to let people
// investigate the issue.
}
}
app.stop().await?;

View File

@@ -45,6 +45,7 @@ use common_wal::config::StandaloneWalConfig;
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
use datanode::datanode::{Datanode, DatanodeBuilder};
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::FlownodeBuilder;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
@@ -426,11 +427,26 @@ impl StartCommand {
)
.await;
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let flow_builder = FlownodeBuilder::new(
1,
Default::default(),
fe_plugins.clone(),
table_metadata_manager.clone(),
catalog_manager.clone(),
);
let flownode = Arc::new(flow_builder.build().await);
let builder =
DatanodeBuilder::new(dn_opts, fe_plugins.clone()).with_kv_backend(kv_backend.clone());
let datanode = builder.build().await.context(StartDatanodeSnafu)?;
let node_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
let node_manager = Arc::new(StandaloneDatanodeManager {
region_server: datanode.region_server(),
flow_server: flownode.clone(),
});
let table_id_sequence = Arc::new(
SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
@@ -448,8 +464,6 @@ impl StartCommand {
opts.wal.into(),
kv_backend.clone(),
));
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
table_id_sequence,
@@ -482,6 +496,13 @@ impl StartCommand {
.await
.context(StartFrontendSnafu)?;
// flow server need to be able to use frontend to write insert requests back
flownode
.set_frontend_invoker(Box::new(frontend.clone()))
.await;
// TODO(discord9): unify with adding `start` and `shutdown` method to flownode too.
let _handle = flownode.clone().run_background();
let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
.build()
.await

View File

@@ -291,88 +291,68 @@ impl ChannelConfig {
}
/// A timeout to each request.
pub fn timeout(self, timeout: Duration) -> Self {
Self {
timeout: Some(timeout),
..self
}
pub fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = Some(timeout);
self
}
/// A timeout to connecting to the uri.
///
/// Defaults to no timeout.
pub fn connect_timeout(self, timeout: Duration) -> Self {
Self {
connect_timeout: Some(timeout),
..self
}
pub fn connect_timeout(mut self, timeout: Duration) -> Self {
self.connect_timeout = Some(timeout);
self
}
/// A concurrency limit to each request.
pub fn concurrency_limit(self, limit: usize) -> Self {
Self {
concurrency_limit: Some(limit),
..self
}
pub fn concurrency_limit(mut self, limit: usize) -> Self {
self.concurrency_limit = Some(limit);
self
}
/// A rate limit to each request.
pub fn rate_limit(self, limit: u64, duration: Duration) -> Self {
Self {
rate_limit: Some((limit, duration)),
..self
}
pub fn rate_limit(mut self, limit: u64, duration: Duration) -> Self {
self.rate_limit = Some((limit, duration));
self
}
/// Sets the SETTINGS_INITIAL_WINDOW_SIZE option for HTTP2 stream-level flow control.
/// Default is 65,535
pub fn initial_stream_window_size(self, size: u32) -> Self {
Self {
initial_stream_window_size: Some(size),
..self
}
pub fn initial_stream_window_size(mut self, size: u32) -> Self {
self.initial_stream_window_size = Some(size);
self
}
/// Sets the max connection-level flow control for HTTP2
///
/// Default is 65,535
pub fn initial_connection_window_size(self, size: u32) -> Self {
Self {
initial_connection_window_size: Some(size),
..self
}
pub fn initial_connection_window_size(mut self, size: u32) -> Self {
self.initial_connection_window_size = Some(size);
self
}
/// Set http2 KEEP_ALIVE_INTERVAL. Uses hypers default otherwise.
pub fn http2_keep_alive_interval(self, duration: Duration) -> Self {
Self {
http2_keep_alive_interval: Some(duration),
..self
}
pub fn http2_keep_alive_interval(mut self, duration: Duration) -> Self {
self.http2_keep_alive_interval = Some(duration);
self
}
/// Set http2 KEEP_ALIVE_TIMEOUT. Uses hypers default otherwise.
pub fn http2_keep_alive_timeout(self, duration: Duration) -> Self {
Self {
http2_keep_alive_timeout: Some(duration),
..self
}
pub fn http2_keep_alive_timeout(mut self, duration: Duration) -> Self {
self.http2_keep_alive_timeout = Some(duration);
self
}
/// Set http2 KEEP_ALIVE_WHILE_IDLE. Uses hypers default otherwise.
pub fn http2_keep_alive_while_idle(self, enabled: bool) -> Self {
Self {
http2_keep_alive_while_idle: Some(enabled),
..self
}
pub fn http2_keep_alive_while_idle(mut self, enabled: bool) -> Self {
self.http2_keep_alive_while_idle = Some(enabled);
self
}
/// Sets whether to use an adaptive flow control. Uses hypers default otherwise.
pub fn http2_adaptive_window(self, enabled: bool) -> Self {
Self {
http2_adaptive_window: Some(enabled),
..self
}
pub fn http2_adaptive_window(mut self, enabled: bool) -> Self {
self.http2_adaptive_window = Some(enabled);
self
}
/// Set whether TCP keepalive messages are enabled on accepted connections.
@@ -381,31 +361,25 @@ impl ChannelConfig {
/// will be the time to remain idle before sending TCP keepalive probes.
///
/// Default is no keepalive (None)
pub fn tcp_keepalive(self, duration: Duration) -> Self {
Self {
tcp_keepalive: Some(duration),
..self
}
pub fn tcp_keepalive(mut self, duration: Duration) -> Self {
self.tcp_keepalive = Some(duration);
self
}
/// Set the value of TCP_NODELAY option for accepted connections.
///
/// Enabled by default.
pub fn tcp_nodelay(self, enabled: bool) -> Self {
Self {
tcp_nodelay: enabled,
..self
}
pub fn tcp_nodelay(mut self, enabled: bool) -> Self {
self.tcp_nodelay = enabled;
self
}
/// Set the value of tls client auth.
///
/// Disabled by default.
pub fn client_tls_config(self, client_tls_option: ClientTlsOption) -> Self {
Self {
client_tls: Some(client_tls_option),
..self
}
pub fn client_tls_config(mut self, client_tls_option: ClientTlsOption) -> Self {
self.client_tls = Some(client_tls_option);
self
}
}

View File

@@ -119,12 +119,11 @@ impl CreateFlowProcedure {
&sink_table_name.table_name,
))
.await?;
ensure!(
!exists,
error::TableAlreadyExistsSnafu {
table_name: sink_table_name.to_string(),
}
);
// TODO(discord9): due to undefined behavior in flow's plan in how to transform types in mfp, sometime flow can't deduce correct schema
// and require manually create sink table
if exists {
common_telemetry::warn!("Table already exists, table: {}", sink_table_name);
}
self.collect_source_tables().await?;
self.allocate_flow_id().await?;

View File

@@ -516,6 +516,7 @@ mod tests {
use common_meta::key::datanode_table::DatanodeTableManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::KvBackendRef;
use mito2::engine::MITO_ENGINE_NAME;
use store_api::region_request::RegionRequest;
use store_api::storage::RegionId;
@@ -528,7 +529,7 @@ mod tests {
let txn = mgr
.build_create_txn(
1028,
"mock",
MITO_ENGINE_NAME,
"foo/bar/weny",
HashMap::from([("foo".to_string(), "bar".to_string())]),
HashMap::default(),
@@ -542,8 +543,9 @@ mod tests {
#[tokio::test]
async fn test_initialize_region_server() {
common_telemetry::init_default_ut_logging();
let mut mock_region_server = mock_region_server();
let (mock_region, mut mock_region_handler) = MockRegionEngine::new();
let (mock_region, mut mock_region_handler) = MockRegionEngine::new(MITO_ENGINE_NAME);
mock_region_server.register_engine(mock_region.clone());

View File

@@ -121,6 +121,7 @@ mod tests {
use std::time::Duration;
use common_meta::instruction::{InstructionReply, UpgradeRegion};
use mito2::engine::MITO_ENGINE_NAME;
use store_api::region_engine::RegionRole;
use store_api::storage::RegionId;
use tokio::time::Instant;
@@ -133,7 +134,7 @@ mod tests {
#[tokio::test]
async fn test_region_not_exist() {
let mut mock_region_server = mock_region_server();
let (mock_engine, _) = MockRegionEngine::new();
let (mock_engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
mock_region_server.register_engine(mock_engine);
let handler_context = HandlerContext {
@@ -167,13 +168,14 @@ mod tests {
let mock_region_server = mock_region_server();
let region_id = RegionId::new(1024, 1);
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
region_engine.mock_role = Some(Some(RegionRole::Leader));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
// Should be unreachable.
unreachable!();
}));
});
let (mock_engine, _) =
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
region_engine.mock_role = Some(Some(RegionRole::Leader));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
// Should be unreachable.
unreachable!();
}));
});
mock_region_server.register_test_region(region_id, mock_engine);
let handler_context = HandlerContext {
@@ -207,13 +209,14 @@ mod tests {
let mock_region_server = mock_region_server();
let region_id = RegionId::new(1024, 1);
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
// Region is not ready.
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
// Note: Don't change.
region_engine.handle_request_delay = Some(Duration::from_secs(100));
});
let (mock_engine, _) =
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
// Region is not ready.
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
// Note: Don't change.
region_engine.handle_request_delay = Some(Duration::from_secs(100));
});
mock_region_server.register_test_region(region_id, mock_engine);
let handler_context = HandlerContext {
@@ -247,13 +250,14 @@ mod tests {
let mock_region_server = mock_region_server();
let region_id = RegionId::new(1024, 1);
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
// Region is not ready.
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
// Note: Don't change.
region_engine.handle_request_delay = Some(Duration::from_millis(300));
});
let (mock_engine, _) =
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
// Region is not ready.
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| Ok(0)));
// Note: Don't change.
region_engine.handle_request_delay = Some(Duration::from_millis(300));
});
mock_region_server.register_test_region(region_id, mock_engine);
let waits = vec![
@@ -308,18 +312,19 @@ mod tests {
let mock_region_server = mock_region_server();
let region_id = RegionId::new(1024, 1);
let (mock_engine, _) = MockRegionEngine::with_custom_apply_fn(|region_engine| {
// Region is not ready.
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
error::UnexpectedSnafu {
violated: "mock_error".to_string(),
}
.fail()
}));
// Note: Don't change.
region_engine.handle_request_delay = Some(Duration::from_millis(100));
});
let (mock_engine, _) =
MockRegionEngine::with_custom_apply_fn(MITO_ENGINE_NAME, |region_engine| {
// Region is not ready.
region_engine.mock_role = Some(Some(RegionRole::Follower));
region_engine.handle_request_mock_fn = Some(Box::new(|_, _| {
error::UnexpectedSnafu {
violated: "mock_error".to_string(),
}
.fail()
}));
// Note: Don't change.
region_engine.handle_request_delay = Some(Duration::from_millis(100));
});
mock_region_server.register_test_region(region_id, mock_engine);
let handler_context = HandlerContext {

View File

@@ -34,6 +34,7 @@ use common_telemetry::{info, warn};
use dashmap::DashMap;
use futures_util::future::try_join_all;
use metric_engine::engine::MetricEngine;
use mito2::engine::MITO_ENGINE_NAME;
use prost::Message;
pub use query::dummy_catalog::{
DummyCatalogList, DummyTableProviderFactory, TableProviderFactoryRef,
@@ -44,7 +45,9 @@ use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
use servers::grpc::region_server::RegionServerHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use store_api::metric_engine_consts::{METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY};
use store_api::metric_engine_consts::{
FILE_ENGINE_NAME, LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME,
};
use store_api::region_engine::{RegionEngineRef, RegionRole, SetReadonlyResponse};
use store_api::region_request::{AffectedRows, RegionCloseRequest, RegionRequest};
use store_api::storage::RegionId;
@@ -403,7 +406,7 @@ impl RegionServerInner {
let current_region_status = self.region_map.get(&region_id);
let engine = match region_change {
RegionChange::Register(ref engine_type, _) => match current_region_status {
RegionChange::Register(attribute) => match current_region_status {
Some(status) => match status.clone() {
RegionEngineWithStatus::Registering(_) => {
return Ok(CurrentEngine::EarlyReturn(0))
@@ -417,8 +420,10 @@ impl RegionServerInner {
.engines
.read()
.unwrap()
.get(engine_type)
.with_context(|| RegionEngineNotFoundSnafu { name: engine_type })?
.get(attribute.engine())
.with_context(|| RegionEngineNotFoundSnafu {
name: attribute.engine(),
})?
.clone(),
},
RegionChange::Deregisters => match current_region_status {
@@ -461,11 +466,13 @@ impl RegionServerInner {
.start_timer();
let region_change = match &request {
RegionRequest::Create(create) => RegionChange::Register(create.engine.clone(), false),
RegionRequest::Create(create) => {
let attribute = parse_region_attribute(&create.engine, &create.options)?;
RegionChange::Register(attribute)
}
RegionRequest::Open(open) => {
let is_opening_physical_region =
open.options.contains_key(PHYSICAL_TABLE_METADATA_KEY);
RegionChange::Register(open.engine.clone(), is_opening_physical_region)
let attribute = parse_region_attribute(&open.engine, &open.options)?;
RegionChange::Register(attribute)
}
RegionRequest::Close(_) | RegionRequest::Drop(_) => RegionChange::Deregisters,
RegionRequest::Put(_)
@@ -514,7 +521,7 @@ impl RegionServerInner {
region_change: &RegionChange,
) {
match region_change {
RegionChange::Register(_, _) => {
RegionChange::Register(_) => {
self.region_map.insert(
region_id,
RegionEngineWithStatus::Registering(engine.clone()),
@@ -533,7 +540,7 @@ impl RegionServerInner {
fn unset_region_status(&self, region_id: RegionId, region_change: RegionChange) {
match region_change {
RegionChange::None => {}
RegionChange::Register(_, _) | RegionChange::Deregisters => {
RegionChange::Register(_) | RegionChange::Deregisters => {
self.region_map.remove(&region_id);
}
}
@@ -548,15 +555,28 @@ impl RegionServerInner {
let engine_type = engine.name();
match region_change {
RegionChange::None => {}
RegionChange::Register(_, is_opening_physical_region) => {
if is_opening_physical_region {
self.register_logical_regions(&engine, region_id).await?;
}
info!("Region {region_id} is registered to engine {engine_type}");
RegionChange::Register(attribute) => {
info!(
"Region {region_id} is registered to engine {}",
attribute.engine()
);
self.region_map
.insert(region_id, RegionEngineWithStatus::Ready(engine));
self.event_listener.on_region_registered(region_id);
.insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
match attribute {
RegionAttribute::Metric { physical } => {
if physical {
// Registers the logical regions belong to the physical region (`region_id`).
self.register_logical_regions(&engine, region_id).await?;
// We only send the `on_region_registered` event of the physical region.
self.event_listener.on_region_registered(region_id);
}
}
RegionAttribute::Mito => self.event_listener.on_region_registered(region_id),
RegionAttribute::File => {
// do nothing
}
}
}
RegionChange::Deregisters => {
info!("Region {region_id} is deregistered from engine {engine_type}");
@@ -699,10 +719,45 @@ impl RegionServerInner {
enum RegionChange {
None,
Register(String, bool),
Register(RegionAttribute),
Deregisters,
}
fn parse_region_attribute(
engine: &str,
options: &HashMap<String, String>,
) -> Result<RegionAttribute> {
match engine {
MITO_ENGINE_NAME => Ok(RegionAttribute::Mito),
METRIC_ENGINE_NAME => {
let physical = !options.contains_key(LOGICAL_TABLE_METADATA_KEY);
Ok(RegionAttribute::Metric { physical })
}
FILE_ENGINE_NAME => Ok(RegionAttribute::File),
_ => error::UnexpectedSnafu {
violated: format!("Unknown engine: {}", engine),
}
.fail(),
}
}
enum RegionAttribute {
Mito,
Metric { physical: bool },
File,
}
impl RegionAttribute {
fn engine(&self) -> &'static str {
match self {
RegionAttribute::Mito => MITO_ENGINE_NAME,
RegionAttribute::Metric { .. } => METRIC_ENGINE_NAME,
RegionAttribute::File => FILE_ENGINE_NAME,
}
}
}
#[cfg(test)]
mod tests {
@@ -723,7 +778,7 @@ mod tests {
common_telemetry::init_default_ut_logging();
let mut mock_region_server = mock_region_server();
let (engine, _receiver) = MockRegionEngine::new();
let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
let engine_name = engine.name();
mock_region_server.register_engine(engine.clone());
@@ -781,7 +836,7 @@ mod tests {
common_telemetry::init_default_ut_logging();
let mut mock_region_server = mock_region_server();
let (engine, _receiver) = MockRegionEngine::new();
let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
mock_region_server.register_engine(engine.clone());
@@ -832,7 +887,7 @@ mod tests {
common_telemetry::init_default_ut_logging();
let mut mock_region_server = mock_region_server();
let (engine, _receiver) = MockRegionEngine::new();
let (engine, _receiver) = MockRegionEngine::new(MITO_ENGINE_NAME);
mock_region_server.register_engine(engine.clone());
@@ -857,13 +912,15 @@ mod tests {
common_telemetry::init_default_ut_logging();
let mut mock_region_server = mock_region_server();
let (engine, _receiver) =
MockRegionEngine::with_mock_fn(Box::new(|_region_id, _request| {
let (engine, _receiver) = MockRegionEngine::with_mock_fn(
MITO_ENGINE_NAME,
Box::new(|_region_id, _request| {
error::UnexpectedSnafu {
violated: "test".to_string(),
}
.fail()
}));
}),
);
mock_region_server.register_engine(engine.clone());
@@ -904,7 +961,7 @@ mod tests {
common_telemetry::init_default_ut_logging();
let mut mock_region_server = mock_region_server();
let (engine, _) = MockRegionEngine::new();
let (engine, _) = MockRegionEngine::new(MITO_ENGINE_NAME);
mock_region_server.register_engine(engine.clone());
let region_id = RegionId::new(1024, 1);
@@ -950,7 +1007,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: None,
region_change: RegionChange::Register(engine.name().to_string(), false),
region_change: RegionChange::Register(RegionAttribute::Mito),
assert: Box::new(|result| {
let current_engine = result.unwrap();
assert_matches!(current_engine, CurrentEngine::Engine(_));
@@ -959,7 +1016,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: Some(RegionEngineWithStatus::Registering(engine.clone())),
region_change: RegionChange::Register(engine.name().to_string(), false),
region_change: RegionChange::Register(RegionAttribute::Mito),
assert: Box::new(|result| {
let current_engine = result.unwrap();
assert_matches!(current_engine, CurrentEngine::EarlyReturn(_));
@@ -968,7 +1025,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: Some(RegionEngineWithStatus::Deregistering(engine.clone())),
region_change: RegionChange::Register(engine.name().to_string(), false),
region_change: RegionChange::Register(RegionAttribute::Mito),
assert: Box::new(|result| {
let err = result.unwrap_err();
assert_eq!(err.status_code(), StatusCode::RegionBusy);
@@ -977,7 +1034,7 @@ mod tests {
CurrentEngineTest {
region_id,
current_region_status: Some(RegionEngineWithStatus::Ready(engine.clone())),
region_change: RegionChange::Register(engine.name().to_string(), false),
region_change: RegionChange::Register(RegionAttribute::Mito),
assert: Box::new(|result| {
let current_engine = result.unwrap();
assert_matches!(current_engine, CurrentEngine::Engine(_));

View File

@@ -106,10 +106,11 @@ pub struct MockRegionEngine {
pub(crate) handle_request_delay: Option<Duration>,
pub(crate) handle_request_mock_fn: Option<MockRequestHandler>,
pub(crate) mock_role: Option<Option<RegionRole>>,
engine: String,
}
impl MockRegionEngine {
pub fn new() -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
pub fn new(engine: &str) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
let (tx, rx) = tokio::sync::mpsc::channel(8);
(
@@ -118,12 +119,14 @@ impl MockRegionEngine {
sender: tx,
handle_request_mock_fn: None,
mock_role: None,
engine: engine.to_string(),
}),
rx,
)
}
pub fn with_mock_fn(
engine: &str,
mock_fn: MockRequestHandler,
) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
let (tx, rx) = tokio::sync::mpsc::channel(8);
@@ -134,12 +137,16 @@ impl MockRegionEngine {
sender: tx,
handle_request_mock_fn: Some(mock_fn),
mock_role: None,
engine: engine.to_string(),
}),
rx,
)
}
pub fn with_custom_apply_fn<F>(apply: F) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>)
pub fn with_custom_apply_fn<F>(
engine: &str,
apply: F,
) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>)
where
F: FnOnce(&mut MockRegionEngine),
{
@@ -149,6 +156,7 @@ impl MockRegionEngine {
sender: tx,
handle_request_mock_fn: None,
mock_role: None,
engine: engine.to_string(),
};
apply(&mut region_engine);
@@ -160,7 +168,7 @@ impl MockRegionEngine {
#[async_trait::async_trait]
impl RegionEngine for MockRegionEngine {
fn name(&self) -> &str {
"mock"
&self.engine
}
async fn handle_request(

View File

@@ -26,7 +26,10 @@ futures = "0.3"
# This fork is simply for keeping our dependency in our org, and pin the version
# it is the same with upstream repo
async-trait.workspace = true
common-function.workspace = true
common-meta.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
enum-as-inner = "0.6.0"
greptime-proto.workspace = true
hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }

View File

@@ -14,19 +14,686 @@
//! for getting data from source and sending results to sink
//! and communicating with other parts of the database
#![warn(unused_imports)]
use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;
use std::time::{Instant, SystemTime};
use api::v1::{RowDeleteRequest, RowDeleteRequests, RowInsertRequest, RowInsertRequests};
use catalog::CatalogManagerRef;
use common_base::Plugins;
use common_error::ext::BoxedError;
use common_frontend::handler::FrontendInvoker;
use common_meta::key::TableMetadataManagerRef;
use common_runtime::JoinHandle;
use common_telemetry::{debug, info};
use datatypes::schema::ColumnSchema;
use datatypes::value::Value;
use greptime_proto::v1;
use itertools::Itertools;
use query::{QueryEngine, QueryEngineFactory};
use serde::{Deserialize, Serialize};
use session::context::QueryContext;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ConcreteDataType, RegionId};
use table::metadata::TableId;
use tokio::sync::{oneshot, watch, Mutex, RwLock};
use crate::adapter::error::{ExternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
pub(crate) use crate::adapter::node_context::FlownodeContext;
use crate::adapter::parse_expr::parse_fixed;
use crate::adapter::table_source::TableSource;
use crate::adapter::util::column_schemas_to_proto;
use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::expr::GlobalId;
use crate::repr::{self, DiffRow, Row};
use crate::transform::{register_function_to_query_engine, sql_to_flow_plan};
pub(crate) mod error;
pub(crate) mod node_context;
mod table_source;
mod flownode_impl;
mod parse_expr;
mod server;
#[cfg(test)]
mod tests;
mod util;
pub(crate) use node_context::FlownodeContext;
pub(crate) use table_source::TableSource;
mod worker;
pub(crate) mod node_context;
mod table_source;
use error::Error;
pub const PER_REQ_MAX_ROW_CNT: usize = 8192;
// TODO: refactor common types for flow to a separate module
/// FlowId is a unique identifier for a flow task
pub type FlowId = u32;
pub type FlowId = u64;
pub type TableName = [String; 3];
/// Options for flow node
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct FlownodeOptions {
/// rpc address
pub rpc_addr: String,
}
/// Flownode Builder
pub struct FlownodeBuilder {
flow_node_id: u32,
opts: FlownodeOptions,
plugins: Plugins,
table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef,
}
impl FlownodeBuilder {
/// init flownode builder
pub fn new(
flow_node_id: u32,
opts: FlownodeOptions,
plugins: Plugins,
table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef,
) -> Self {
Self {
flow_node_id,
opts,
plugins,
table_meta,
catalog_manager,
}
}
/// TODO(discord9): error handling
pub async fn build(self) -> FlownodeManager {
let query_engine_factory = QueryEngineFactory::new_with_plugins(
// query engine in flownode only translate plan with resolved table source.
self.catalog_manager.clone(),
None,
None,
None,
false,
self.plugins.clone(),
);
let query_engine = query_engine_factory.query_engine();
register_function_to_query_engine(&query_engine);
let (tx, rx) = oneshot::channel();
let node_id = Some(self.flow_node_id);
let _handle = std::thread::spawn(move || {
let (flow_node_manager, mut worker) =
FlownodeManager::new_with_worker(node_id, query_engine, self.table_meta.clone());
let _ = tx.send(flow_node_manager);
info!("Flow Worker started in new thread");
worker.run();
});
let man = rx.await.unwrap();
info!("Flow Node Manager started");
man
}
}
/// Arc-ed FlowNodeManager, cheaper to clone
pub type FlownodeManagerRef = Arc<FlownodeManager>;
/// FlowNodeManager manages the state of all tasks in the flow node, which should be run on the same thread
///
/// The choice of timestamp is just using current system timestamp for now
pub struct FlownodeManager {
/// The handler to the worker that will run the dataflow
/// which is `!Send` so a handle is used
pub worker_handles: Vec<Mutex<WorkerHandle>>,
/// The query engine that will be used to parse the query and convert it to a dataflow plan
query_engine: Arc<dyn QueryEngine>,
/// Getting table name and table schema from table info manager
table_info_source: TableSource,
frontend_invoker: RwLock<Option<Box<dyn FrontendInvoker + Send + Sync>>>,
/// contains mapping from table name to global id, and table schema
node_context: Mutex<FlownodeContext>,
flow_err_collectors: RwLock<BTreeMap<FlowId, ErrCollector>>,
src_send_buf_lens: RwLock<BTreeMap<TableId, watch::Receiver<usize>>>,
tick_manager: FlowTickManager,
node_id: Option<u32>,
}
/// Building FlownodeManager
impl FlownodeManager {
/// set frontend invoker
pub async fn set_frontend_invoker(
self: &Arc<Self>,
frontend: Box<dyn FrontendInvoker + Send + Sync>,
) {
*self.frontend_invoker.write().await = Some(frontend);
}
/// Create **without** setting `frontend_invoker`
pub fn new(
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
) -> Self {
let srv_map = TableSource::new(
table_meta.table_info_manager().clone(),
table_meta.table_name_manager().clone(),
);
let node_context = FlownodeContext::default();
let tick_manager = FlowTickManager::new();
let worker_handles = Vec::new();
FlownodeManager {
worker_handles,
query_engine,
table_info_source: srv_map,
frontend_invoker: RwLock::new(None),
node_context: Mutex::new(node_context),
flow_err_collectors: Default::default(),
src_send_buf_lens: Default::default(),
tick_manager,
node_id,
}
}
/// Create a flownode manager with one worker
pub fn new_with_worker<'s>(
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
) -> (Self, Worker<'s>) {
let mut zelf = Self::new(node_id, query_engine, table_meta);
let (handle, worker) = create_worker();
zelf.add_worker_handle(handle);
(zelf, worker)
}
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
self.worker_handles.push(Mutex::new(handle));
}
}
#[derive(Debug)]
pub enum DiffRequest {
Insert(Vec<(Row, repr::Timestamp)>),
Delete(Vec<(Row, repr::Timestamp)>),
}
/// iterate through the diff row and form continuous diff row with same diff type
pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
let mut reqs = Vec::new();
for (row, ts, diff) in rows {
let last = reqs.last_mut();
match (last, diff) {
(Some(DiffRequest::Insert(rows)), 1) => {
rows.push((row, ts));
}
(Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
(Some(DiffRequest::Delete(rows)), -1) => {
rows.push((row, ts));
}
(Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
(None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])),
(None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])),
_ => {}
}
}
reqs
}
/// This impl block contains methods to send writeback requests to frontend
impl FlownodeManager {
/// TODO(discord9): merge all same type of diff row into one requests
///
/// Return the number of requests it made
pub async fn send_writeback_requests(&self) -> Result<usize, Error> {
let all_reqs = self.generate_writeback_request().await;
if all_reqs.is_empty() || all_reqs.iter().all(|v| v.1.is_empty()) {
return Ok(0);
}
let mut req_cnt = 0;
for (table_name, reqs) in all_reqs {
if reqs.is_empty() {
continue;
}
let (catalog, schema) = (table_name[0].clone(), table_name[1].clone());
let ctx = Arc::new(QueryContext::with(&catalog, &schema));
// TODO(discord9): instead of auto build table from request schema, actually build table
// before `create flow` to be able to assign pk and ts etc.
let (primary_keys, schema, is_auto_create) = if let Some(table_id) = self
.table_info_source
.get_table_id_from_name(&table_name)
.await?
{
let table_info = self
.table_info_source
.get_table_info_value(&table_id)
.await?
.unwrap();
let meta = table_info.table_info.meta;
let primary_keys = meta
.primary_key_indices
.into_iter()
.map(|i| meta.schema.column_schemas[i].name.clone())
.collect_vec();
let schema = meta.schema.column_schemas;
let is_auto_create = schema
.last()
.map(|s| s.name == "__ts_placeholder")
.unwrap_or(false);
(primary_keys, schema, is_auto_create)
} else {
// TODO(discord9): condiser remove buggy auto create by schema
let node_ctx = self.node_context.lock().await;
let gid: GlobalId = node_ctx
.table_repr
.get_by_name(&table_name)
.map(|x| x.1)
.unwrap();
let schema = node_ctx
.schema
.get(&gid)
.with_context(|| TableNotFoundSnafu {
name: format!("Table name = {:?}", table_name),
})?
.clone();
// TODO(discord9): use default key from schema
let primary_keys = schema
.keys
.first()
.map(|v| {
v.column_indices
.iter()
.map(|i| format!("Col_{i}"))
.collect_vec()
})
.unwrap_or_default();
let update_at = ColumnSchema::new(
"update_at",
ConcreteDataType::timestamp_millisecond_datatype(),
true,
);
// TODO(discord9): bugged so we can't infer time index from flow plan, so we have to manually set one
let ts_col = ColumnSchema::new(
"__ts_placeholder",
ConcreteDataType::timestamp_millisecond_datatype(),
true,
)
.with_time_index(true);
let wout_ts = schema
.column_types
.into_iter()
.enumerate()
.map(|(idx, typ)| {
ColumnSchema::new(format!("Col_{idx}"), typ.scalar_type, typ.nullable)
})
.collect_vec();
let mut with_ts = wout_ts.clone();
with_ts.push(update_at);
with_ts.push(ts_col);
(primary_keys, with_ts, true)
};
let proto_schema = column_schemas_to_proto(schema, &primary_keys)?;
debug!(
"Sending {} writeback requests to table {}, reqs={:?}",
reqs.len(),
table_name.join("."),
reqs
);
let now = SystemTime::now();
let now = now
.duration_since(SystemTime::UNIX_EPOCH)
.map(|s| s.as_millis() as repr::Timestamp)
.unwrap_or_else(|_| {
-(SystemTime::UNIX_EPOCH
.duration_since(now)
.unwrap()
.as_millis() as repr::Timestamp)
});
for req in reqs {
match req {
DiffRequest::Insert(insert) => {
let rows_proto: Vec<v1::Row> = insert
.into_iter()
.map(|(mut row, _ts)| {
// `update_at` col
row.extend([Value::from(common_time::Timestamp::new_millisecond(
now,
))]);
// ts col, if auto create
if is_auto_create {
row.extend([Value::from(
common_time::Timestamp::new_millisecond(0),
)]);
}
row.into()
})
.collect::<Vec<_>>();
let table_name = table_name.last().unwrap().clone();
let req = RowInsertRequest {
table_name,
rows: Some(v1::Rows {
schema: proto_schema.clone(),
rows: rows_proto,
}),
};
req_cnt += 1;
self.frontend_invoker
.read()
.await
.as_ref()
.with_context(|| UnexpectedSnafu {
reason: "Expect a frontend invoker for flownode to write back",
})?
.row_inserts(RowInsertRequests { inserts: vec![req] }, ctx.clone())
.await
.map_err(BoxedError::new)
.with_context(|_| ExternalSnafu {})?;
}
DiffRequest::Delete(remove) => {
info!("original remove rows={:?}", remove);
let rows_proto: Vec<v1::Row> = remove
.into_iter()
.map(|(mut row, _ts)| {
row.extend(Some(Value::from(
common_time::Timestamp::new_millisecond(0),
)));
row.into()
})
.collect::<Vec<_>>();
let table_name = table_name.last().unwrap().clone();
let req = RowDeleteRequest {
table_name,
rows: Some(v1::Rows {
schema: proto_schema.clone(),
rows: rows_proto,
}),
};
req_cnt += 1;
self.frontend_invoker
.read()
.await
.as_ref()
.with_context(|| UnexpectedSnafu {
reason: "Expect a frontend invoker for flownode to write back",
})?
.row_deletes(RowDeleteRequests { deletes: vec![req] }, ctx.clone())
.await
.map_err(BoxedError::new)
.with_context(|_| ExternalSnafu {})?;
}
}
}
}
Ok(req_cnt)
}
/// Generate writeback request for all sink table
pub async fn generate_writeback_request(&self) -> BTreeMap<TableName, Vec<DiffRequest>> {
let mut output = BTreeMap::new();
for (name, sink_recv) in self
.node_context
.lock()
.await
.sink_receiver
.iter_mut()
.map(|(n, (_s, r))| (n, r))
{
let mut rows = Vec::new();
while let Ok(row) = sink_recv.try_recv() {
rows.push(row);
}
let reqs = diff_row_to_request(rows);
output.insert(name.clone(), reqs);
}
output
}
}
/// Flow Runtime related methods
impl FlownodeManager {
/// run in common_runtime background runtime
pub fn run_background(self: Arc<Self>) -> JoinHandle<()> {
info!("Starting flownode manager's background task");
common_runtime::spawn_bg(async move {
self.run().await;
})
}
/// log all flow errors
pub async fn log_all_errors(&self) {
for (f_id, f_err) in self.flow_err_collectors.read().await.iter() {
let all_errors = f_err.get_all().await;
if !all_errors.is_empty() {
let all_errors = all_errors
.into_iter()
.map(|i| format!("{:?}", i))
.join("\n");
common_telemetry::error!("Flow {} has following errors: {}", f_id, all_errors);
}
}
}
/// Trigger dataflow running, and then send writeback request to the source sender
///
/// note that this method didn't handle input mirror request, as this should be handled by grpc server
pub async fn run(&self) {
debug!("Starting to run");
loop {
// TODO(discord9): only run when new inputs arrive or scheduled to
self.run_available().await.unwrap();
// TODO(discord9): error handling
self.send_writeback_requests().await.unwrap();
self.log_all_errors().await;
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
}
/// Run all available subgraph in the flow node
/// This will try to run all dataflow in this node
///
/// However this is not blocking and can sometimes return while actual computation is still running in worker thread
/// TODO(discord9): add flag for subgraph that have input since last run
pub async fn run_available(&self) -> Result<(), Error> {
let now = self.tick_manager.tick();
loop {
for worker in self.worker_handles.iter() {
// TODO(discord9): consider how to handle error in individual worker
worker.lock().await.run_available(now).await.unwrap();
}
// first check how many inputs were sent
let send_cnt = match self.node_context.lock().await.flush_all_sender() {
Ok(cnt) => cnt,
Err(err) => {
common_telemetry::error!("Flush send buf errors: {:?}", err);
break;
}
};
// if no inputs
if send_cnt == 0 {
break;
} else {
debug!("FlownodeManager::run_available: send_cnt={}", send_cnt);
}
}
Ok(())
}
/// send write request to related source sender
pub async fn handle_write_request(
&self,
region_id: RegionId,
rows: Vec<DiffRow>,
) -> Result<(), Error> {
debug!(
"Handling write request for region_id={:?} with {} rows",
region_id,
rows.len()
);
let table_id = region_id.table_id();
self.node_context.lock().await.send(table_id, rows)?;
Ok(())
}
}
/// Create&Remove flow
impl FlownodeManager {
/// remove a flow by it's id
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
for handle in self.worker_handles.iter() {
let handle = handle.lock().await;
if handle.contains_flow(flow_id).await? {
handle.remove_flow(flow_id).await?;
break;
}
}
Ok(())
}
/// Return task id if a new task is created, otherwise return None
///
/// steps to create task:
/// 1. parse query into typed plan(and optional parse expire_when expr)
/// 2. render source/sink with output table id and used input table id
#[allow(clippy::too_many_arguments)]
pub async fn create_flow(
&self,
flow_id: FlowId,
sink_table_name: TableName,
source_table_ids: &[TableId],
create_if_not_exist: bool,
expire_when: Option<String>,
comment: Option<String>,
sql: String,
flow_options: HashMap<String, String>,
query_ctx: Option<QueryContext>,
) -> Result<Option<FlowId>, Error> {
if create_if_not_exist {
// check if the task already exists
for handle in self.worker_handles.iter() {
if handle.lock().await.contains_flow(flow_id).await? {
return Ok(None);
}
}
}
let mut node_ctx = self.node_context.lock().await;
// assign global id to source and sink table
for source in source_table_ids {
node_ctx
.assign_global_id_to_table(&self.table_info_source, None, Some(*source))
.await?;
}
node_ctx
.assign_global_id_to_table(&self.table_info_source, Some(sink_table_name.clone()), None)
.await?;
node_ctx.register_task_src_sink(flow_id, source_table_ids, sink_table_name.clone());
node_ctx.query_context = query_ctx.map(Arc::new);
// construct a active dataflow state with it
let flow_plan = sql_to_flow_plan(&mut node_ctx, &self.query_engine, &sql).await?;
debug!("Flow {:?}'s Plan is {:?}", flow_id, flow_plan);
node_ctx.assign_table_schema(&sink_table_name, flow_plan.typ.clone())?;
let expire_when = expire_when
.and_then(|s| {
if s.is_empty() || s.split_whitespace().join("").is_empty() {
None
} else {
Some(s)
}
})
.map(|d| {
let d = d.as_ref();
parse_fixed(d)
.map(|(_, n)| n)
.map_err(|err| err.to_string())
})
.transpose()
.map_err(|err| UnexpectedSnafu { reason: err }.build())?;
let _ = comment;
let _ = flow_options;
// TODO(discord9): add more than one handles
let sink_id = node_ctx.table_repr.get_by_name(&sink_table_name).unwrap().1;
let sink_sender = node_ctx.get_sink_by_global_id(&sink_id)?;
let source_ids = source_table_ids
.iter()
.map(|id| node_ctx.table_repr.get_by_table_id(id).unwrap().1)
.collect_vec();
let source_receivers = source_ids
.iter()
.map(|id| {
node_ctx
.get_source_by_global_id(id)
.map(|s| s.get_receiver())
})
.collect::<Result<Vec<_>, _>>()?;
let err_collector = ErrCollector::default();
self.flow_err_collectors
.write()
.await
.insert(flow_id, err_collector.clone());
let handle = &self.worker_handles[0].lock().await;
let create_request = worker::Request::Create {
flow_id,
plan: flow_plan,
sink_id,
sink_sender,
source_ids,
src_recvs: source_receivers,
expire_when,
create_if_not_exist,
err_collector,
};
handle.create_flow(create_request).await?;
info!("Successfully create flow with id={}", flow_id);
Ok(Some(flow_id))
}
}
/// FlowTickManager is a manager for flow tick, which trakc flow execution progress
///
/// TODO(discord9): better way to do it, and not expose flow tick even to other flow to avoid
/// TSO coord mess
#[derive(Clone)]
pub struct FlowTickManager {
start: Instant,
}
impl std::fmt::Debug for FlowTickManager {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FlowTickManager").finish()
}
}
impl FlowTickManager {
pub fn new() -> Self {
FlowTickManager {
start: Instant::now(),
}
}
/// Return the current timestamp in milliseconds
///
/// TODO(discord9): reconsider since `tick()` require a monotonic clock and also need to survive recover later
pub fn tick(&self) -> repr::Timestamp {
let current = Instant::now();
let since_the_epoch = current - self.start;
since_the_epoch.as_millis() as repr::Timestamp
}
}

View File

@@ -0,0 +1,117 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! impl `FlowNode` trait for FlowNodeManager so standalone can call them
use api::v1::flow::{flow_request, CreateRequest, DropRequest, FlowRequest, FlowResponse};
use api::v1::region::InsertRequests;
use common_error::ext::BoxedError;
use common_meta::error::{ExternalSnafu, Result, UnexpectedSnafu};
use common_meta::node_manager::Flownode;
use itertools::Itertools;
use snafu::ResultExt;
use crate::adapter::FlownodeManager;
use crate::repr::{self, DiffRow};
fn to_meta_err(err: crate::adapter::error::Error) -> common_meta::error::Error {
// TODO(discord9): refactor this
Err::<(), _>(BoxedError::new(err))
.with_context(|_| ExternalSnafu)
.unwrap_err()
}
#[async_trait::async_trait]
impl Flownode for FlownodeManager {
async fn handle(&self, request: FlowRequest) -> Result<FlowResponse> {
let query_ctx = request
.header
.and_then(|h| h.query_context)
.map(|ctx| ctx.into());
match request.body {
Some(flow_request::Body::Create(CreateRequest {
flow_id: Some(task_id),
source_table_ids,
sink_table_name: Some(sink_table_name),
create_if_not_exists,
expire_when,
comment,
sql,
flow_options,
})) => {
let source_table_ids = source_table_ids.into_iter().map(|id| id.id).collect_vec();
let sink_table_name = [
sink_table_name.catalog_name,
sink_table_name.schema_name,
sink_table_name.table_name,
];
let ret = self
.create_flow(
task_id.id as u64,
sink_table_name,
&source_table_ids,
create_if_not_exists,
Some(expire_when),
Some(comment),
sql,
flow_options,
query_ctx,
)
.await
.map_err(to_meta_err)?;
Ok(FlowResponse {
affected_flows: ret
.map(|id| greptime_proto::v1::FlowId { id: id as u32 })
.into_iter()
.collect_vec(),
..Default::default()
})
}
Some(flow_request::Body::Drop(DropRequest {
flow_id: Some(flow_id),
})) => {
self.remove_flow(flow_id.id as u64)
.await
.map_err(to_meta_err)?;
Ok(Default::default())
}
None => UnexpectedSnafu {
err_msg: "Missing request body",
}
.fail(),
_ => UnexpectedSnafu {
err_msg: "Invalid request body.",
}
.fail(),
}
}
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
for write_request in request.requests {
let region_id = write_request.region_id;
let rows_proto = write_request.rows.map(|r| r.rows).unwrap_or(vec![]);
// TODO(discord9): reconsider time assignment mechanism
let now = self.tick_manager.tick();
let rows: Vec<DiffRow> = rows_proto
.into_iter()
.map(repr::Row::from)
.map(|r| (r, now, 1))
.collect_vec();
self.handle_write_request(region_id.into(), rows)
.await
.map_err(to_meta_err)?;
}
Ok(Default::default())
}
}

View File

@@ -30,7 +30,7 @@ use crate::expr::GlobalId;
use crate::repr::{DiffRow, RelationType, BROADCAST_CAP};
/// A context that holds the information of the dataflow
#[derive(Default)]
#[derive(Default, Debug)]
pub struct FlownodeContext {
/// mapping from source table to tasks, useful for schedule which task to run when a source table is updated
pub source_to_tasks: BTreeMap<TableId, BTreeSet<FlowId>>,
@@ -64,6 +64,7 @@ pub struct FlownodeContext {
///
/// receiver still use tokio broadcast channel, since only sender side need to know
/// backpressure and adjust dataflow running duration to avoid blocking
#[derive(Debug)]
pub struct SourceSender {
sender: broadcast::Sender<DiffRow>,
send_buf: VecDeque<DiffRow>,

View File

@@ -0,0 +1,245 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! parse expr like "ts <= now() - interval '5 m'"
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::character::complete::{alphanumeric1, digit0, multispace0};
use nom::combinator::peek;
use nom::sequence::tuple;
use nom::IResult;
use crate::repr;
#[test]
fn test_parse_duration() {
let input = "1 h 5 m 42 second";
let (remain, ttl) = parse_duration(input).unwrap();
assert_eq!(remain, "");
assert_eq!(ttl, (3600 + 5 * 60 + 42) * 1000);
}
#[test]
fn test_parse_fixed() {
let input = "timestamp < now() - INTERVAL '5m 42s'";
let (remain, ttl) = parse_fixed(input).unwrap();
assert_eq!(remain, "");
assert_eq!(ttl, (5 * 60 + 42) * 1000);
}
pub fn parse_fixed(input: &str) -> IResult<&str, i64> {
let (r, _) = tuple((
multispace0,
tag_no_case("timestamp"),
multispace0,
tag("<"),
multispace0,
tag_no_case("now()"),
multispace0,
tag("-"),
multispace0,
tag_no_case("interval"),
multispace0,
))(input)?;
tuple((tag("'"), parse_duration, tag("'")))(r).map(|(r, (_, ttl, _))| (r, ttl))
}
/// parse duration and return ttl, currently only support time part of psql interval type
pub fn parse_duration(input: &str) -> IResult<&str, i64> {
let mut intervals = vec![];
let mut remain = input;
while peek(parse_quality)(remain).is_ok() {
let (r, number) = parse_quality(remain)?;
let (r, unit) = parse_time_unit(r)?;
intervals.push((number, unit));
remain = r;
}
let mut total = 0;
for (number, unit) in intervals {
let number = match unit {
TimeUnit::Second => number,
TimeUnit::Minute => number * 60,
TimeUnit::Hour => number * 60 * 60,
};
total += number;
}
total *= 1000;
Ok((remain, total))
}
enum Expr {
Col(String),
Now,
Duration(repr::Duration),
Binary {
left: Box<Expr>,
op: String,
right: Box<Expr>,
},
}
fn parse_expr(input: &str) -> IResult<&str, Expr> {
parse_expr_bp(input, 0)
}
/// a simple pratt parser
fn parse_expr_bp(input: &str, min_bp: u8) -> IResult<&str, Expr> {
let (mut input, mut lhs): (&str, Expr) = parse_item(input)?;
loop {
let (r, op) = parse_op(input)?;
let (_, (l_bp, r_bp)) = infix_binding_power(op)?;
if l_bp < min_bp {
return Ok((input, lhs));
}
let (r, rhs) = parse_expr_bp(r, r_bp)?;
input = r;
lhs = Expr::Binary {
left: Box::new(lhs),
op: op.to_string(),
right: Box::new(rhs),
};
}
}
fn parse_op(input: &str) -> IResult<&str, &str> {
alt((parse_add_sub, parse_cmp))(input)
}
fn parse_item(input: &str) -> IResult<&str, Expr> {
if let Ok((r, name)) = parse_col_name(input) {
Ok((r, Expr::Col(name.to_string())))
} else if let Ok((r, _now)) = parse_now(input) {
Ok((r, Expr::Now))
} else if let Ok((_r, _num)) = parse_quality(input) {
todo!()
} else {
todo!()
}
}
fn infix_binding_power(op: &str) -> IResult<&str, (u8, u8)> {
let ret = match op {
"<" | ">" | "<=" | ">=" => (1, 2),
"+" | "-" => (3, 4),
_ => {
return Err(nom::Err::Error(nom::error::Error::new(
op,
nom::error::ErrorKind::Fail,
)))
}
};
Ok((op, ret))
}
fn parse_col_name(input: &str) -> IResult<&str, &str> {
tuple((multispace0, alphanumeric1, multispace0))(input).map(|(r, (_, name, _))| (r, name))
}
fn parse_now(input: &str) -> IResult<&str, &str> {
tag_no_case("now()")(input)
}
fn parse_add_sub(input: &str) -> IResult<&str, &str> {
tuple((multispace0, alt((tag("+"), tag("-"))), multispace0))(input)
.map(|(r, (_, op, _))| (r, op))
}
fn parse_cmp(input: &str) -> IResult<&str, &str> {
tuple((
multispace0,
alt((tag("<="), tag(">="), tag("<"), tag(">"))),
multispace0,
))(input)
.map(|(r, (_, op, _))| (r, op))
}
/// parse a number with optional sign
fn parse_quality(input: &str) -> IResult<&str, repr::Duration> {
tuple((
multispace0,
alt((tag("+"), tag("-"), tag(""))),
digit0,
multispace0,
))(input)
.map(|(r, (_, sign, name, _))| (r, sign, name))
.and_then(|(r, sign, name)| {
let num = name.parse::<repr::Duration>().map_err(|_| {
nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
})?;
let num = match sign {
"+" => num,
"-" => -num,
_ => num,
};
Ok((r, num))
})
}
#[derive(Debug, Clone)]
enum TimeUnit {
Second,
Minute,
Hour,
}
#[derive(Debug, Clone)]
enum DateUnit {
Day,
Month,
Year,
}
fn parse_time_unit(input: &str) -> IResult<&str, TimeUnit> {
fn to_second(input: &str) -> IResult<&str, TimeUnit> {
alt((
tag_no_case("second"),
tag_no_case("seconds"),
tag_no_case("S"),
))(input)
.map(move |(r, _)| (r, TimeUnit::Second))
}
fn to_minute(input: &str) -> IResult<&str, TimeUnit> {
alt((
tag_no_case("minute"),
tag_no_case("minutes"),
tag_no_case("m"),
))(input)
.map(move |(r, _)| (r, TimeUnit::Minute))
}
fn to_hour(input: &str) -> IResult<&str, TimeUnit> {
alt((tag_no_case("hour"), tag_no_case("hours"), tag_no_case("h")))(input)
.map(move |(r, _)| (r, TimeUnit::Hour))
}
tuple((
multispace0,
alt((
to_second, to_minute,
to_hour, /*
tag_no_case("day"),
tag_no_case("days"),
tag_no_case("d"),
tag_no_case("month"),
tag_no_case("months"),
tag_no_case("m"),
tag_no_case("year"),
tag_no_case("years"),
tag_no_case("y"),
*/
)),
multispace0,
))(input)
.map(|(r, (_, unit, _))| (r, unit))
}

View File

@@ -0,0 +1,147 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Implementation of grpc service for flow node
use std::net::SocketAddr;
use common_meta::node_manager::Flownode;
use common_telemetry::tracing::info;
use futures::FutureExt;
use greptime_proto::v1::flow::{flow_server, FlowRequest, FlowResponse, InsertRequests};
use itertools::Itertools;
use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu};
use snafu::{ensure, ResultExt};
use tokio::net::TcpListener;
use tokio::sync::{oneshot, Mutex};
use tonic::transport::server::TcpIncoming;
use tonic::{Request, Response, Status};
use crate::adapter::FlownodeManagerRef;
pub const FLOW_NODE_SERVER_NAME: &str = "FLOW_NODE_SERVER";
/// wrapping flow node manager to avoid orphan rule with Arc<...>
#[derive(Clone)]
pub struct FlowService {
pub manager: FlownodeManagerRef,
}
#[async_trait::async_trait]
impl flow_server::Flow for FlowService {
async fn handle_create_remove(
&self,
request: Request<FlowRequest>,
) -> Result<Response<FlowResponse>, Status> {
let request = request.into_inner();
self.manager
.handle(request)
.await
.map(Response::new)
.map_err(|e| {
let msg = format!("failed to handle request: {:?}", e);
Status::internal(msg)
})
}
async fn handle_mirror_request(
&self,
request: Request<InsertRequests>,
) -> Result<Response<FlowResponse>, Status> {
let request = request.into_inner();
// TODO(discord9): fix protobuf import order shenanigans to remove this duplicated define
let request = api::v1::region::InsertRequests {
requests: request
.requests
.into_iter()
.map(|insert| api::v1::region::InsertRequest {
region_id: insert.region_id,
rows: insert.rows,
})
.collect_vec(),
};
self.manager
.handle_inserts(request)
.await
.map(Response::new)
.map_err(|e| {
let msg = format!("failed to handle request: {:?}", e);
Status::internal(msg)
})
}
}
pub struct FlownodeServer {
pub shutdown_tx: Mutex<Option<oneshot::Sender<()>>>,
pub flow_service: FlowService,
}
impl FlownodeServer {
pub fn create_flow_service(&self) -> flow_server::FlowServer<impl flow_server::Flow> {
flow_server::FlowServer::new(self.flow_service.clone())
}
}
#[async_trait::async_trait]
impl servers::server::Server for FlownodeServer {
async fn shutdown(&self) -> Result<(), servers::error::Error> {
let mut shutdown_tx = self.shutdown_tx.lock().await;
if let Some(tx) = shutdown_tx.take() {
if tx.send(()).is_err() {
info!("Receiver dropped, the flow node server has already shutdown");
}
}
info!("Shutdown flow node server");
Ok(())
}
async fn start(&self, addr: SocketAddr) -> Result<SocketAddr, servers::error::Error> {
let (tx, rx) = oneshot::channel::<()>();
let (incoming, addr) = {
let mut shutdown_tx = self.shutdown_tx.lock().await;
ensure!(
shutdown_tx.is_none(),
AlreadyStartedSnafu { server: "flow" }
);
let listener = TcpListener::bind(addr)
.await
.context(TcpBindSnafu { addr })?;
let addr = listener.local_addr().context(TcpBindSnafu { addr })?;
let incoming =
TcpIncoming::from_listener(listener, true, None).context(TcpIncomingSnafu)?;
info!("flow server is bound to {}", addr);
*shutdown_tx = Some(tx);
(incoming, addr)
};
let builder = tonic::transport::Server::builder().add_service(self.create_flow_service());
let _handle = common_runtime::spawn_bg(async move {
let _result = builder
.serve_with_incoming_shutdown(incoming, rx.map(drop))
.await
.context(StartGrpcSnafu);
});
// TODO(discord9): better place for dataflow to run per second
let manager_ref = self.flow_service.manager.clone();
let _handle = manager_ref.clone().run_background();
Ok(addr)
}
fn name(&self) -> &str {
FLOW_NODE_SERVER_NAME
}
}

View File

@@ -0,0 +1,64 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Mock test for adapter module
//! TODO(discord9): write mock test
use datatypes::schema::{ColumnSchema, SchemaBuilder};
use store_api::storage::ConcreteDataType;
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
use super::*;
pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
table_id: TableId,
table_name: &str,
region_numbers: I,
) -> TableInfo {
let column_schemas = vec![
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true),
];
let schema = SchemaBuilder::try_from(column_schemas)
.unwrap()
.version(123)
.build()
.unwrap();
let meta = TableMetaBuilder::default()
.schema(Arc::new(schema))
.primary_key_indices(vec![0])
.engine("engine")
.next_column_id(3)
.region_numbers(region_numbers.into_iter().collect::<Vec<_>>())
.build()
.unwrap();
TableInfoBuilder::default()
.table_id(table_id)
.table_version(5)
.name(table_name)
.meta(meta)
.build()
.unwrap()
}
/// Create a mock harness for flow node manager
///
/// containing several default table info and schema
fn mock_harness_flow_node_manager() {}

View File

@@ -15,15 +15,16 @@
//! For single-thread flow worker
use std::collections::{BTreeMap, VecDeque};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use common_telemetry::info;
use enum_as_inner::EnumAsInner;
use hydroflow::scheduled::graph::Hydroflow;
use snafu::{ensure, OptionExt};
use tokio::sync::{broadcast, mpsc, Mutex};
use crate::adapter::error::{Error, FlowAlreadyExistSnafu, InternalSnafu};
use crate::adapter::error::{Error, FlowAlreadyExistSnafu, InternalSnafu, UnexpectedSnafu};
use crate::adapter::FlowId;
use crate::compute::{Context, DataflowState, ErrCollector};
use crate::expr::GlobalId;
@@ -39,6 +40,7 @@ pub fn create_worker<'a>() -> (WorkerHandle, Worker<'a>) {
let (itc_client, itc_server) = create_inter_thread_call();
let worker_handle = WorkerHandle {
itc_client: Mutex::new(itc_client),
shutdown: AtomicBool::new(false),
};
let worker = Worker {
task_states: BTreeMap::new(),
@@ -105,6 +107,7 @@ impl<'subgraph> ActiveDataflowState<'subgraph> {
#[derive(Debug)]
pub struct WorkerHandle {
itc_client: Mutex<InterThreadCallClient>,
shutdown: AtomicBool,
}
impl WorkerHandle {
@@ -123,7 +126,7 @@ impl WorkerHandle {
.itc_client
.lock()
.await
.call_blocking(create_reqs)
.call_with_resp(create_reqs)
.await?;
ret.into_create().map_err(|ret| {
InternalSnafu {
@@ -138,7 +141,7 @@ impl WorkerHandle {
/// remove task, return task id
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<bool, Error> {
let req = Request::Remove { flow_id };
let ret = self.itc_client.lock().await.call_blocking(req).await?;
let ret = self.itc_client.lock().await.call_with_resp(req).await?;
ret.into_remove().map_err(|ret| {
InternalSnafu {
@@ -157,13 +160,12 @@ impl WorkerHandle {
self.itc_client
.lock()
.await
.call_non_blocking(Request::RunAvail { now })
.await
.call_no_resp(Request::RunAvail { now })
}
pub async fn contains_flow(&self, flow_id: FlowId) -> Result<bool, Error> {
let req = Request::ContainTask { flow_id };
let ret = self.itc_client.lock().await.call_blocking(req).await?;
let ret = self.itc_client.lock().await.call_with_resp(req).await?;
ret.into_contain_task().map_err(|ret| {
InternalSnafu {
@@ -177,11 +179,37 @@ impl WorkerHandle {
/// shutdown the worker
pub async fn shutdown(&self) -> Result<(), Error> {
self.itc_client
.lock()
.await
.call_non_blocking(Request::Shutdown)
.await
if !self.shutdown.fetch_or(true, Ordering::SeqCst) {
self.itc_client.lock().await.call_no_resp(Request::Shutdown)
} else {
UnexpectedSnafu {
reason: "Worker already shutdown",
}
.fail()
}
}
/// shutdown the worker
pub fn shutdown_blocking(&self) -> Result<(), Error> {
if !self.shutdown.fetch_or(true, Ordering::SeqCst) {
self.itc_client
.blocking_lock()
.call_no_resp(Request::Shutdown)
} else {
UnexpectedSnafu {
reason: "Worker already shutdown",
}
.fail()
}
}
}
impl Drop for WorkerHandle {
fn drop(&mut self) {
if let Err(err) = self.shutdown_blocking() {
common_telemetry::error!("Fail to shutdown worker: {:?}", err)
}
info!("Flow Worker shutdown due to Worker Handle dropped.")
}
}
@@ -208,7 +236,6 @@ impl<'s> Worker<'s> {
create_if_not_exist: bool,
err_collector: ErrCollector,
) -> Result<Option<FlowId>, Error> {
let _ = expire_when;
let already_exist = self.task_states.contains_key(&flow_id);
match (already_exist, create_if_not_exist) {
(true, true) => return Ok(None),
@@ -220,6 +247,7 @@ impl<'s> Worker<'s> {
err_collector,
..Default::default()
};
cur_task_state.state.set_expire_after(expire_when);
{
let mut ctx = cur_task_state.new_ctx(sink_id);
@@ -395,7 +423,7 @@ struct InterThreadCallClient {
impl InterThreadCallClient {
/// call without expecting responses or blocking
async fn call_non_blocking(&self, req: Request) -> Result<(), Error> {
fn call_no_resp(&self, req: Request) -> Result<(), Error> {
// TODO(discord9): relax memory order later
let call_id = self.call_id.fetch_add(1, Ordering::SeqCst);
self.arg_sender
@@ -404,7 +432,7 @@ impl InterThreadCallClient {
}
/// call blocking, and return the result
async fn call_blocking(&mut self, req: Request) -> Result<Response, Error> {
async fn call_with_resp(&mut self, req: Request) -> Result<Response, Error> {
// TODO(discord9): relax memory order later
let call_id = self.call_id.fetch_add(1, Ordering::SeqCst);
self.arg_sender

View File

@@ -111,7 +111,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
input,
key_val_plan,
reduce_plan,
} => self.render_reduce(input, key_val_plan, reduce_plan),
} => self.render_reduce(input, key_val_plan, reduce_plan, plan.typ),
Plan::Join { .. } => NotImplementedSnafu {
reason: "Join is still WIP",
}
@@ -223,11 +223,11 @@ mod test {
use hydroflow::scheduled::graph::Hydroflow;
use hydroflow::scheduled::graph_ext::GraphExt;
use hydroflow::scheduled::handoff::VecHandoff;
use pretty_assertions::{assert_eq, assert_ne};
use super::*;
use crate::expr::BinaryFunc;
use crate::repr::Row;
pub fn run_and_check(
state: &mut DataflowState,
df: &mut Hydroflow,
@@ -238,6 +238,12 @@ mod test {
for now in time_range {
state.set_current_ts(now);
state.run_available_with_schedule(df);
if !state.get_err_collector().is_empty() {
panic!(
"Errors occur: {:?}",
state.get_err_collector().get_all_blocking()
)
}
assert!(state.get_err_collector().is_empty());
if let Some(expected) = expected.get(&now) {
assert_eq!(*output.borrow(), *expected, "at ts={}", now);

View File

@@ -29,8 +29,8 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector
use crate::expr::error::{DataTypeSnafu, InternalSnafu};
use crate::expr::{AggregateExpr, EvalError, ScalarExpr};
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan};
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter};
use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, KeyExpiryManager};
impl<'referred, 'df> Context<'referred, 'df> {
const REDUCE: &'static str = "reduce";
@@ -42,6 +42,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
input: Box<TypedPlan>,
key_val_plan: KeyValPlan,
reduce_plan: ReducePlan,
output_type: RelationType,
) -> Result<CollectionBundle, Error> {
let input = self.render_plan(*input)?;
// first assembly key&val that's ((Row, Row), tick, diff)
@@ -52,6 +53,15 @@ impl<'referred, 'df> Context<'referred, 'df> {
// TODO(discord9): config global expire time from self
let arrange_handler = self.compute_state.new_arrange(None);
if let (Some(time_index), Some(expire_after)) =
(output_type.time_index, self.compute_state.expire_after())
{
let expire_man =
KeyExpiryManager::new(Some(expire_after), Some(ScalarExpr::Column(time_index)));
arrange_handler.write().set_expire_state(expire_man);
}
// reduce need full arrangement to be able to query all keys
let arrange_handler_inner = arrange_handler.clone_full_arrange().context(PlanSnafu {
reason: "No write is expected at this point",
@@ -729,15 +739,273 @@ mod test {
use std::cell::RefCell;
use std::rc::Rc;
use datatypes::data_type::ConcreteDataType;
use common_time::{DateTime, Interval, Timestamp};
use datatypes::data_type::{ConcreteDataType, ConcreteDataType as CDT};
use hydroflow::scheduled::graph::Hydroflow;
use super::*;
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
use crate::compute::state::DataflowState;
use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject};
use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc};
use crate::repr::{ColumnType, RelationType};
/// SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00')
/// input table columns: number, ts
/// expected: sum(number), window_start, window_end
#[test]
fn test_tumble_group_by() {
let mut df = Hydroflow::new();
let mut state = DataflowState::default();
let mut ctx = harness_test_ctx(&mut df, &mut state);
const START: i64 = 1625097600000;
let rows = vec![
(1u32, START + 1000),
(2u32, START + 1500),
(3u32, START + 2000),
(1u32, START + 2500),
(2u32, START + 3000),
(3u32, START + 3500),
];
let rows = rows
.into_iter()
.map(|(number, ts)| {
(
Row::new(vec![number.into(), Timestamp::new_millisecond(ts).into()]),
1,
1,
)
})
.collect_vec();
let collection = ctx.render_constant(rows.clone());
ctx.insert_global(GlobalId::User(1), collection);
let aggr_expr = AggregateExpr {
func: AggregateFunc::SumUInt32,
expr: ScalarExpr::Column(0),
distinct: false,
};
let expected = TypedPlan {
typ: RelationType::new(vec![
ColumnType::new(CDT::uint64_datatype(), true), // sum(number)
ColumnType::new(CDT::datetime_datatype(), false), // window start
ColumnType::new(CDT::datetime_datatype(), false), // window end
]),
// TODO(discord9): mfp indirectly ref to key columns
/*
.with_key(vec![1])
.with_time_index(Some(0)),*/
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
input: Box::new(
Plan::Get {
id: crate::expr::Id::Global(GlobalId::User(1)),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
ColumnType::new(ConcreteDataType::datetime_datatype(), false),
])),
),
key_val_plan: KeyValPlan {
key_plan: MapFilterProject::new(2)
.map(vec![
ScalarExpr::Column(1).call_unary(
UnaryFunc::TumbleWindowFloor {
window_size: Interval::from_month_day_nano(
0,
0,
1_000_000_000,
),
start_time: Some(DateTime::new(1625097600000)),
},
),
ScalarExpr::Column(1).call_unary(
UnaryFunc::TumbleWindowCeiling {
window_size: Interval::from_month_day_nano(
0,
0,
1_000_000_000,
),
start_time: Some(DateTime::new(1625097600000)),
},
),
])
.unwrap()
.project(vec![2, 3])
.unwrap()
.into_safe(),
val_plan: MapFilterProject::new(2)
.project(vec![0, 1])
.unwrap()
.into_safe(),
},
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: vec![aggr_expr.clone()],
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)],
distinct_aggrs: vec![],
}),
}
.with_types(
RelationType::new(vec![
ColumnType::new(CDT::datetime_datatype(), false), // window start
ColumnType::new(CDT::datetime_datatype(), false), // window end
ColumnType::new(CDT::uint64_datatype(), true), //sum(number)
])
.with_key(vec![1])
.with_time_index(Some(0)),
),
),
mfp: MapFilterProject::new(3)
.map(vec![
ScalarExpr::Column(2),
ScalarExpr::Column(3),
ScalarExpr::Column(0),
ScalarExpr::Column(1),
])
.unwrap()
.project(vec![4, 5, 6])
.unwrap(),
},
};
let bundle = ctx.render_plan(expected).unwrap();
let output = get_output_handle(&mut ctx, bundle);
drop(ctx);
let expected = BTreeMap::from([(
1,
vec![
(
Row::new(vec![
3u64.into(),
Timestamp::new_millisecond(START + 1000).into(),
Timestamp::new_millisecond(START + 2000).into(),
]),
1,
1,
),
(
Row::new(vec![
4u64.into(),
Timestamp::new_millisecond(START + 2000).into(),
Timestamp::new_millisecond(START + 3000).into(),
]),
1,
1,
),
(
Row::new(vec![
5u64.into(),
Timestamp::new_millisecond(START + 3000).into(),
Timestamp::new_millisecond(START + 4000).into(),
]),
1,
1,
),
],
)]);
run_and_check(&mut state, &mut df, 1..2, expected, output);
}
/// select avg(number) from number;
#[test]
fn test_avg_eval() {
let mut df = Hydroflow::new();
let mut state = DataflowState::default();
let mut ctx = harness_test_ctx(&mut df, &mut state);
let rows = vec![
(Row::new(vec![1u32.into()]), 1, 1),
(Row::new(vec![2u32.into()]), 1, 1),
(Row::new(vec![3u32.into()]), 1, 1),
(Row::new(vec![1u32.into()]), 1, 1),
(Row::new(vec![2u32.into()]), 1, 1),
(Row::new(vec![3u32.into()]), 1, 1),
];
let collection = ctx.render_constant(rows.clone());
ctx.insert_global(GlobalId::User(1), collection);
let aggr_exprs = vec![
AggregateExpr {
func: AggregateFunc::SumUInt32,
expr: ScalarExpr::Column(0),
distinct: false,
},
AggregateExpr {
func: AggregateFunc::Count,
expr: ScalarExpr::Column(0),
distinct: false,
},
];
let avg_expr = ScalarExpr::If {
cond: Box::new(ScalarExpr::Column(1).call_binary(
ScalarExpr::Literal(Value::from(0u32), CDT::int64_datatype()),
BinaryFunc::NotEq,
)),
then: Box::new(ScalarExpr::Column(0).call_binary(
ScalarExpr::Column(1).call_unary(UnaryFunc::Cast(CDT::uint64_datatype())),
BinaryFunc::DivUInt64,
)),
els: Box::new(ScalarExpr::Literal(Value::Null, CDT::uint64_datatype())),
};
let expected = TypedPlan {
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
input: Box::new(
Plan::Get {
id: crate::expr::Id::Global(GlobalId::User(1)),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::int64_datatype(), false),
])),
),
key_val_plan: KeyValPlan {
key_plan: MapFilterProject::new(1)
.project(vec![])
.unwrap()
.into_safe(),
val_plan: MapFilterProject::new(1)
.project(vec![0])
.unwrap()
.into_safe(),
},
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: aggr_exprs.clone(),
simple_aggrs: vec![
AggrWithIndex::new(aggr_exprs[0].clone(), 0, 0),
AggrWithIndex::new(aggr_exprs[1].clone(), 0, 1),
],
distinct_aggrs: vec![],
}),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint32_datatype(), true),
ColumnType::new(ConcreteDataType::int64_datatype(), true),
])),
),
mfp: MapFilterProject::new(2)
.map(vec![
avg_expr,
// TODO(discord9): optimize mfp so to remove indirect ref
ScalarExpr::Column(2),
])
.unwrap()
.project(vec![3])
.unwrap(),
},
};
let bundle = ctx.render_plan(expected).unwrap();
let output = get_output_handle(&mut ctx, bundle);
drop(ctx);
let expected = BTreeMap::from([(1, vec![(Row::new(vec![2u64.into()]), 1, 1)])]);
run_and_check(&mut state, &mut df, 1..2, expected, output);
}
/// SELECT DISTINCT col FROM table
///
/// table schema:
@@ -776,6 +1044,7 @@ mod test {
Box::new(input_plan.with_types(typ)),
key_val_plan,
reduce_plan,
RelationType::empty(),
)
.unwrap();
@@ -850,6 +1119,7 @@ mod test {
Box::new(input_plan.with_types(typ)),
key_val_plan,
reduce_plan,
RelationType::empty(),
)
.unwrap();
@@ -930,6 +1200,7 @@ mod test {
Box::new(input_plan.with_types(typ)),
key_val_plan,
reduce_plan,
RelationType::empty(),
)
.unwrap();
@@ -1006,6 +1277,7 @@ mod test {
Box::new(input_plan.with_types(typ)),
key_val_plan,
reduce_plan,
RelationType::empty(),
)
.unwrap();
@@ -1097,6 +1369,7 @@ mod test {
Box::new(input_plan.with_types(typ)),
key_val_plan,
reduce_plan,
RelationType::empty(),
)
.unwrap();

View File

@@ -36,6 +36,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
&mut self,
mut src_recv: broadcast::Receiver<DiffRow>,
) -> Result<CollectionBundle, Error> {
debug!("Rendering Source");
let (send_port, recv_port) = self.df.make_edge::<_, Toff>("source");
let arrange_handler = self.compute_state.new_arrange(None);
let arrange_handler_inner =
@@ -60,7 +61,6 @@ impl<'referred, 'df> Context<'referred, 'df> {
let prev_avail = arr.into_iter().map(|((k, _), t, d)| (k, t, d));
let mut to_send = Vec::new();
let mut to_arrange = Vec::new();
// TODO(discord9): handling tokio broadcast error
while let Ok((r, t, d)) = src_recv.try_recv() {
if t <= now {
@@ -72,7 +72,7 @@ impl<'referred, 'df> Context<'referred, 'df> {
let all = prev_avail.chain(to_send).collect_vec();
if !all.is_empty() || !to_arrange.is_empty() {
debug!(
"All send: {} rows, not yet send: {} rows",
"Rendered Source All send: {} rows, not yet send: {} rows",
all.len(),
to_arrange.len()
);

View File

@@ -42,6 +42,8 @@ pub struct DataflowState {
/// save all used arrange in this dataflow, since usually there is no delete operation
/// we can just keep track of all used arrange and schedule subgraph when they need to be updated
arrange_used: Vec<ArrangeHandler>,
/// the time arrangement need to be expired after a certain time in milliseconds
expire_after: Option<Timestamp>,
}
impl DataflowState {
@@ -99,6 +101,14 @@ impl DataflowState {
pub fn get_err_collector(&self) -> ErrCollector {
self.err_collector.clone()
}
pub fn set_expire_after(&mut self, after: Option<repr::Duration>) {
self.expire_after = after;
}
pub fn expire_after(&self) -> Option<Timestamp> {
self.expire_after
}
}
#[derive(Debug, Clone)]

View File

@@ -153,6 +153,9 @@ pub struct ErrCollector {
}
impl ErrCollector {
pub fn get_all_blocking(&self) -> Vec<EvalError> {
self.inner.blocking_lock().drain(..).collect_vec()
}
pub async fn get_all(&self) -> Vec<EvalError> {
self.inner.lock().await.drain(..).collect_vec()
}

View File

@@ -17,8 +17,10 @@
use std::collections::HashMap;
use std::sync::OnceLock;
use common_error::ext::BoxedError;
use common_telemetry::debug;
use common_time::DateTime;
use common_time::timestamp::TimeUnit;
use common_time::{DateTime, Timestamp};
use datafusion_expr::Operator;
use datatypes::data_type::ConcreteDataType;
use datatypes::types::cast;
@@ -30,14 +32,14 @@ use snafu::{ensure, OptionExt, ResultExt};
use strum::{EnumIter, IntoEnumIterator};
use substrait::df_logical_plan::consumer::name_to_op;
use crate::adapter::error::{Error, InvalidQuerySnafu, PlanSnafu};
use crate::adapter::error::{Error, ExternalSnafu, InvalidQuerySnafu, PlanSnafu};
use crate::expr::error::{
CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, TryFromValueSnafu,
TypeMismatchSnafu,
CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, OverflowSnafu,
TryFromValueSnafu, TypeMismatchSnafu,
};
use crate::expr::signature::{GenericFn, Signature};
use crate::expr::{InvalidArgumentSnafu, ScalarExpr};
use crate::repr::{value_to_internal_ts, Row};
use crate::expr::{InvalidArgumentSnafu, ScalarExpr, TypedExpr};
use crate::repr::{self, value_to_internal_ts, Row};
/// UnmaterializableFunc is a function that can't be eval independently,
/// and require special handling
@@ -45,6 +47,11 @@ use crate::repr::{value_to_internal_ts, Row};
pub enum UnmaterializableFunc {
Now,
CurrentSchema,
TumbleWindow {
ts: Box<TypedExpr>,
window_size: common_time::Interval,
start_time: Option<DateTime>,
},
}
impl UnmaterializableFunc {
@@ -61,14 +68,51 @@ impl UnmaterializableFunc {
output: ConcreteDataType::string_datatype(),
generic_fn: GenericFn::CurrentSchema,
},
Self::TumbleWindow { .. } => Signature {
input: smallvec![ConcreteDataType::timestamp_millisecond_datatype()],
output: ConcreteDataType::timestamp_millisecond_datatype(),
generic_fn: GenericFn::TumbleWindow,
},
}
}
/// Create a UnmaterializableFunc from a string of the function name
pub fn from_str(name: &str) -> Result<Self, Error> {
match name {
pub fn from_str_args(name: &str, args: Vec<TypedExpr>) -> Result<Self, Error> {
match name.to_lowercase().as_str() {
"now" => Ok(Self::Now),
"current_schema" => Ok(Self::CurrentSchema),
"tumble" => {
let ts = args.first().context(InvalidQuerySnafu {
reason: "Tumble window function requires a timestamp argument",
})?;
let window_size = args
.get(1)
.and_then(|expr| expr.expr.as_literal())
.context(InvalidQuerySnafu {
reason: "Tumble window function requires a window size argument"
})?.as_string() // TODO(discord9): since df to substrait convertor does not support interval type yet, we need to take a string and cast it to interval instead
.map(|s|cast(Value::from(s), &ConcreteDataType::interval_month_day_nano_datatype())).transpose().map_err(BoxedError::new).context(
ExternalSnafu
)?.and_then(|v|v.as_interval())
.with_context(||InvalidQuerySnafu {
reason: format!("Tumble window function requires window size argument to be a string describe a interval, found {:?}", args.get(1))
})?;
let start_time = match args.get(2) {
Some(start_time) => start_time.expr.as_literal(),
None => None,
}
.map(|s| cast(s.clone(), &ConcreteDataType::datetime_datatype())).transpose().map_err(BoxedError::new).context(ExternalSnafu)?.map(|v|v.as_datetime().with_context(
||InvalidQuerySnafu {
reason: format!("Tumble window function requires start time argument to be a datetime describe in string, found {:?}", args.get(2))
}
)).transpose()?;
Ok(Self::TumbleWindow {
ts: Box::new(ts.clone()),
window_size,
start_time,
})
}
_ => InvalidQuerySnafu {
reason: format!("Unknown unmaterializable function: {}", name),
}
@@ -87,6 +131,14 @@ pub enum UnaryFunc {
IsFalse,
StepTimestamp,
Cast(ConcreteDataType),
TumbleWindowFloor {
window_size: common_time::Interval,
start_time: Option<DateTime>,
},
TumbleWindowCeiling {
window_size: common_time::Interval,
start_time: Option<DateTime>,
},
}
impl UnaryFunc {
@@ -118,6 +170,16 @@ impl UnaryFunc {
output: to.clone(),
generic_fn: GenericFn::Cast,
},
Self::TumbleWindowFloor { .. } => Signature {
input: smallvec![ConcreteDataType::timestamp_millisecond_datatype()],
output: ConcreteDataType::timestamp_millisecond_datatype(),
generic_fn: GenericFn::TumbleWindow,
},
Self::TumbleWindowCeiling { .. } => Signature {
input: smallvec![ConcreteDataType::timestamp_millisecond_datatype()],
output: ConcreteDataType::timestamp_millisecond_datatype(),
generic_fn: GenericFn::TumbleWindow,
},
}
}
@@ -211,10 +273,51 @@ impl UnaryFunc {
debug!("Cast to type: {to:?}, result: {:?}", res);
res
}
Self::TumbleWindowFloor {
window_size,
start_time,
} => {
let ts = get_ts_as_millisecond(arg)?;
let start_time = start_time.map(|t| t.val()).unwrap_or(0);
let window_size = (window_size.to_nanosecond() / 1_000_000) as repr::Duration; // nanosecond to millisecond
let window_start = start_time + (ts - start_time) / window_size * window_size;
let ret = Timestamp::new_millisecond(window_start);
Ok(Value::from(ret))
}
Self::TumbleWindowCeiling {
window_size,
start_time,
} => {
let ts = get_ts_as_millisecond(arg)?;
let start_time = start_time.map(|t| t.val()).unwrap_or(0);
let window_size = (window_size.to_nanosecond() / 1_000_000) as repr::Duration; // nanosecond to millisecond
let window_start = start_time + (ts - start_time) / window_size * window_size;
let window_end = window_start + window_size;
let ret = Timestamp::new_millisecond(window_end);
Ok(Value::from(ret))
}
}
}
}
fn get_ts_as_millisecond(arg: Value) -> Result<repr::Timestamp, EvalError> {
let ts = if let Some(ts) = arg.as_timestamp() {
ts.convert_to(TimeUnit::Millisecond)
.context(OverflowSnafu)?
.value()
} else if let Some(ts) = arg.as_datetime() {
ts.val()
} else {
InvalidArgumentSnafu {
reason: "Expect input to be timestamp or datetime type",
}
.fail()?
};
Ok(ts)
}
/// BinaryFunc is a function that takes two arguments.
/// Also notice this enum doesn't contain function arguments, since the arguments are stored in the expression.
///
@@ -375,6 +478,22 @@ impl BinaryFunc {
)
}
pub fn add(input_type: ConcreteDataType) -> Result<Self, Error> {
Self::specialization(GenericFn::Add, input_type)
}
pub fn sub(input_type: ConcreteDataType) -> Result<Self, Error> {
Self::specialization(GenericFn::Sub, input_type)
}
pub fn mul(input_type: ConcreteDataType) -> Result<Self, Error> {
Self::specialization(GenericFn::Mul, input_type)
}
pub fn div(input_type: ConcreteDataType) -> Result<Self, Error> {
Self::specialization(GenericFn::Div, input_type)
}
/// Get the specialization of the binary function based on the generic function and the input type
pub fn specialization(generic: GenericFn, input_type: ConcreteDataType) -> Result<Self, Error> {
let rule = SPECIALIZATION.get_or_init(|| {

View File

@@ -136,27 +136,44 @@ impl AggregateFunc {
/// Generate signature for each aggregate function
macro_rules! generate_signature {
($value:ident, { $($user_arm:tt)* },
[ $(
$auto_arm:ident=>($con_type:ident,$generic:ident)
),*
]) => {
($value:ident,
{ $($user_arm:tt)* },
[ $(
$auto_arm:ident=>($($arg:ident),*)
),*
]
) => {
match $value {
$($user_arm)*,
$(
Self::$auto_arm => Signature {
input: smallvec![
ConcreteDataType::$con_type(),
ConcreteDataType::$con_type(),
],
output: ConcreteDataType::$con_type(),
generic_fn: GenericFn::$generic,
},
Self::$auto_arm => gen_one_siginature!($($arg),*),
)*
}
};
}
/// Generate one match arm with optional arguments
macro_rules! gen_one_siginature {
(
$con_type:ident, $generic:ident
) => {
Signature {
input: smallvec![ConcreteDataType::$con_type(), ConcreteDataType::$con_type(),],
output: ConcreteDataType::$con_type(),
generic_fn: GenericFn::$generic,
}
};
(
$in_type:ident, $out_type:ident, $generic:ident
) => {
Signature {
input: smallvec![ConcreteDataType::$in_type()],
output: ConcreteDataType::$out_type(),
generic_fn: GenericFn::$generic,
}
};
}
static SPECIALIZATION: OnceLock<HashMap<(GenericFn, ConcreteDataType), AggregateFunc>> =
OnceLock::new();
@@ -223,6 +240,8 @@ impl AggregateFunc {
/// all concrete datatypes with precision types will be returned with largest possible variant
/// as a exception, count have a signature of `null -> i64`, but it's actually `anytype -> i64`
///
/// TODO(discorcd9): fix signature for sum unsign -> u64 sum signed -> i64
pub fn signature(&self) -> Signature {
generate_signature!(self, {
AggregateFunc::Count => Signature {
@@ -263,12 +282,12 @@ impl AggregateFunc {
MinTime => (time_second_datatype, Min),
MinDuration => (duration_second_datatype, Min),
MinInterval => (interval_year_month_datatype, Min),
SumInt16 => (int16_datatype, Sum),
SumInt32 => (int32_datatype, Sum),
SumInt64 => (int64_datatype, Sum),
SumUInt16 => (uint16_datatype, Sum),
SumUInt32 => (uint32_datatype, Sum),
SumUInt64 => (uint64_datatype, Sum),
SumInt16 => (int16_datatype, int64_datatype, Sum),
SumInt32 => (int32_datatype, int64_datatype, Sum),
SumInt64 => (int64_datatype, int64_datatype, Sum),
SumUInt16 => (uint16_datatype, uint64_datatype, Sum),
SumUInt32 => (uint32_datatype, uint64_datatype, Sum),
SumUInt64 => (uint64_datatype, uint64_datatype, Sum),
SumFloat32 => (float32_datatype, Sum),
SumFloat64 => (float64_datatype, Sum),
Any => (boolean_datatype, Any),

View File

@@ -26,10 +26,10 @@ use crate::adapter::error::{
};
use crate::expr::error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
use crate::expr::func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
use crate::repr::ColumnType;
use crate::repr::{ColumnType, RelationType};
/// A scalar expression with a known type.
#[derive(Debug, Clone)]
#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)]
pub struct TypedExpr {
/// The expression.
pub expr: ScalarExpr,
@@ -43,6 +43,73 @@ impl TypedExpr {
}
}
impl TypedExpr {
/// expand multi-value expression to multiple expressions with new indices
pub fn expand_multi_value(
input_typ: &RelationType,
exprs: &[TypedExpr],
) -> Result<Vec<TypedExpr>, Error> {
// old indices in mfp, expanded expr
let mut ret = vec![];
let input_arity = input_typ.column_types.len();
for (old_idx, expr) in exprs.iter().enumerate() {
if let ScalarExpr::CallUnmaterializable(UnmaterializableFunc::TumbleWindow {
ts,
window_size,
start_time,
}) = &expr.expr
{
let floor = UnaryFunc::TumbleWindowFloor {
window_size: *window_size,
start_time: *start_time,
};
let ceil = UnaryFunc::TumbleWindowCeiling {
window_size: *window_size,
start_time: *start_time,
};
let floor = ScalarExpr::CallUnary {
func: floor,
expr: Box::new(ts.expr.clone()),
}
.with_type(ts.typ.clone());
ret.push((None, floor));
let ceil = ScalarExpr::CallUnary {
func: ceil,
expr: Box::new(ts.expr.clone()),
}
.with_type(ts.typ.clone());
ret.push((None, ceil));
} else {
ret.push((Some(input_arity + old_idx), expr.clone()))
}
}
// get shuffled index(old_idx -> new_idx)
// note index is offset by input_arity because mfp is designed to be first include input columns then intermediate columns
let shuffle = ret
.iter()
.map(|(old_idx, _)| *old_idx) // [Option<opt_idx>]
.enumerate()
.map(|(new, old)| (old, new + input_arity))
.flat_map(|(old, new)| old.map(|o| (o, new)))
.chain((0..input_arity).map(|i| (i, i))) // also remember to chain the input columns as not changed
.collect::<BTreeMap<_, _>>();
// shuffle expr's index
let exprs = ret
.into_iter()
.map(|(_, mut expr)| {
// invariant: it is expect that no expr will try to refer the column being expanded
expr.expr.permute_map(&shuffle)?;
Ok(expr)
})
.collect::<Result<Vec<_>, _>>()?;
Ok(dbg!(exprs))
}
}
/// A scalar expression, which can be evaluated to a value.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ScalarExpr {
@@ -83,6 +150,10 @@ pub enum ScalarExpr {
}
impl ScalarExpr {
pub fn with_type(self, typ: ColumnType) -> TypedExpr {
TypedExpr::new(self, typ)
}
/// try to determine the type of the expression
pub fn typ(&self, context: &[ColumnType]) -> Result<ColumnType, Error> {
match self {

View File

@@ -64,4 +64,5 @@ pub enum GenericFn {
// unmaterized func
Now,
CurrentSchema,
TumbleWindow,
}

View File

@@ -17,6 +17,7 @@
//! It also contains definition of expression, adapter and plan, and internal state management.
#![feature(let_chains)]
#![feature(duration_abs_diff)]
#![allow(dead_code)]
#![allow(unused_imports)]
#![warn(missing_docs)]
@@ -30,3 +31,5 @@ mod plan;
mod repr;
mod transform;
mod utils;
pub use adapter::{FlownodeBuilder, FlownodeManager, FlownodeManagerRef, FlownodeOptions};

View File

@@ -44,7 +44,7 @@ pub struct TypedPlan {
impl TypedPlan {
/// directly apply a mfp to the plan
pub fn mfp(self, mfp: MapFilterProject) -> Result<Self, Error> {
let new_type = self.typ.apply_mfp(&mfp, &[])?;
let new_type = self.typ.apply_mfp(&mfp)?;
let plan = match self.plan {
Plan::Mfp {
input,
@@ -68,14 +68,14 @@ impl TypedPlan {
pub fn projection(self, exprs: Vec<TypedExpr>) -> Result<Self, Error> {
let input_arity = self.typ.column_types.len();
let output_arity = exprs.len();
let (exprs, expr_typs): (Vec<_>, Vec<_>) = exprs
let (exprs, _expr_typs): (Vec<_>, Vec<_>) = exprs
.into_iter()
.map(|TypedExpr { expr, typ }| (expr, typ))
.unzip();
let mfp = MapFilterProject::new(input_arity)
.map(exprs)?
.project(input_arity..input_arity + output_arity)?;
let out_typ = self.typ.apply_mfp(&mfp, &expr_typs)?;
let out_typ = self.typ.apply_mfp(&mfp)?;
// special case for mfp to compose when the plan is already mfp
let plan = match self.plan {
Plan::Mfp {

View File

@@ -111,13 +111,13 @@ impl RelationType {
/// then new key=`[1]`, new time index=`[0]`
///
/// note that this function will remove empty keys like key=`[]` will be removed
pub fn apply_mfp(&self, mfp: &MapFilterProject, expr_typs: &[ColumnType]) -> Result<Self> {
let all_types = self
.column_types
.iter()
.chain(expr_typs.iter())
.cloned()
.collect_vec();
pub fn apply_mfp(&self, mfp: &MapFilterProject) -> Result<Self> {
let mut all_types = self.column_types.clone();
for expr in &mfp.expressions {
let expr_typ = expr.typ(&self.column_types)?;
all_types.push(expr_typ);
}
let all_types = all_types;
let mfp_out_types = mfp
.projection
.iter()
@@ -131,6 +131,7 @@ impl RelationType {
})
})
.try_collect()?;
let old_to_new_col = BTreeMap::from_iter(
mfp.projection
.clone()
@@ -205,6 +206,15 @@ impl RelationType {
self
}
/// will also remove time index from keys if it's in keys
pub fn with_time_index(mut self, time_index: Option<usize>) -> Self {
self.time_index = time_index;
for key in &mut self.keys {
key.remove_col(time_index.unwrap_or(usize::MAX));
}
self
}
/// Computes the number of columns in the relation.
pub fn arity(&self) -> usize {
self.column_types.len()

View File

@@ -130,12 +130,60 @@ pub async fn sql_to_flow_plan(
Ok(flow_plan)
}
/// register flow-specific functions to the query engine
pub fn register_function_to_query_engine(engine: &Arc<dyn QueryEngine>) {
engine.register_function(Arc::new(TumbleFunction {}));
}
#[derive(Debug)]
pub struct TumbleFunction {}
const TUMBLE_NAME: &str = "tumble";
impl std::fmt::Display for TumbleFunction {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", TUMBLE_NAME.to_ascii_uppercase())
}
}
impl common_function::function::Function for TumbleFunction {
fn name(&self) -> &str {
TUMBLE_NAME
}
fn return_type(&self, _input_types: &[CDT]) -> common_query::error::Result<CDT> {
Ok(CDT::datetime_datatype())
}
fn signature(&self) -> common_query::prelude::Signature {
common_query::prelude::Signature::variadic_any(common_query::prelude::Volatility::Immutable)
}
fn eval(
&self,
_func_ctx: common_function::function::FunctionContext,
_columns: &[datatypes::prelude::VectorRef],
) -> common_query::error::Result<datatypes::prelude::VectorRef> {
UnexpectedSnafu {
reason: "Tumbler function is not implemented for datafusion executor",
}
.fail()
.map_err(BoxedError::new)
.context(common_query::error::ExecuteSnafu)
}
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use catalog::RegisterTableRequest;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
use common_time::{Date, DateTime};
use datatypes::prelude::*;
use datatypes::schema::Schema;
use datatypes::vectors::VectorRef;
use itertools::Itertools;
use prost::Message;
use query::parser::QueryLanguageParser;
use query::plan::LogicalPlan;
@@ -144,23 +192,45 @@ mod test {
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use substrait_proto::proto;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::test_util::MemTable;
use super::*;
use crate::adapter::node_context::IdToNameMap;
use crate::repr::ColumnType;
pub fn create_test_ctx() -> FlownodeContext {
let gid = GlobalId::User(0);
let name = [
"greptime".to_string(),
"public".to_string(),
"numbers".to_string(),
];
let schema = RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), false)]);
let mut schemas = HashMap::new();
let mut tri_map = IdToNameMap::new();
tri_map.insert(Some(name.clone()), Some(0), gid);
{
let gid = GlobalId::User(0);
let name = [
"greptime".to_string(),
"public".to_string(),
"numbers".to_string(),
];
let schema = RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), false)]);
tri_map.insert(Some(name.clone()), Some(1024), gid);
schemas.insert(gid, schema);
}
{
let gid = GlobalId::User(1);
let name = [
"greptime".to_string(),
"public".to_string(),
"numbers_with_ts".to_string(),
];
let schema = RelationType::new(vec![
ColumnType::new(CDT::uint32_datatype(), false),
ColumnType::new(CDT::datetime_datatype(), false),
]);
schemas.insert(gid, schema);
tri_map.insert(Some(name.clone()), Some(1025), gid);
}
FlownodeContext {
schema: HashMap::from([(gid, schema)]),
schema: schemas,
table_repr: tri_map,
query_context: Some(Arc::new(QueryContext::with("greptime", "public"))),
..Default::default()
@@ -177,9 +247,37 @@ mod test {
table: NumbersTable::table(NUMBERS_TABLE_ID),
};
catalog_list.register_table_sync(req).unwrap();
let schema = vec![
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
datatypes::schema::ColumnSchema::new("ts", CDT::datetime_datatype(), false),
];
let mut columns = vec![];
let numbers = (1..=10).collect_vec();
let column: VectorRef = Arc::new(<u32 as Scalar>::VectorType::from_vec(numbers));
columns.push(column);
let ts = (1..=10).collect_vec();
let column: VectorRef = Arc::new(<DateTime as Scalar>::VectorType::from_vec(ts));
columns.push(column);
let schema = Arc::new(Schema::new(schema));
let recordbatch = common_recordbatch::RecordBatch::new(schema, columns).unwrap();
let table = MemTable::table("numbers_with_ts", recordbatch);
let req_with_ts = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "numbers_with_ts".to_string(),
table_id: 1024,
table,
};
catalog_list.register_table_sync(req_with_ts).unwrap();
let factory = query::QueryEngineFactory::new(catalog_list, None, None, None, false);
let engine = factory.query_engine();
engine.register_function(Arc::new(TumbleFunction {}));
assert_eq!("datafusion", engine.name());
engine

View File

@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use common_decimal::Decimal128;
use common_time::{Date, Timestamp};
use datatypes::arrow::compute::kernels::window;
use datatypes::arrow::ipc::Binary;
use datatypes::data_type::ConcreteDataType as CDT;
use datatypes::data_type::{ConcreteDataType as CDT, DataType};
use datatypes::value::Value;
use hydroflow::futures::future::Map;
use itertools::Itertools;
@@ -83,14 +83,18 @@ impl TypedExpr {
}
impl AggregateExpr {
/// Convert list of `Measure` into Flow's AggregateExpr
///
/// Return both the AggregateExpr and a MapFilterProject that is the final output of the aggregate function
fn from_substrait_agg_measures(
ctx: &mut FlownodeContext,
measures: &[Measure],
typ: &RelationType,
extensions: &FunctionExtensions,
) -> Result<Vec<AggregateExpr>, Error> {
) -> Result<(Vec<AggregateExpr>, MapFilterProject), Error> {
let _ = ctx;
let mut aggr_exprs = vec![];
let mut all_aggr_exprs = vec![];
let mut post_maps = vec![];
for m in measures {
let filter = &m
@@ -99,7 +103,7 @@ impl AggregateExpr {
.map(|fil| TypedExpr::from_substrait_rex(fil, typ, extensions))
.transpose()?;
let agg_func = match &m.measure {
let (aggr_expr, post_mfp) = match &m.measure {
Some(f) => {
let distinct = match f.invocation {
_ if f.invocation == AggregationInvocation::Distinct as i32 => true,
@@ -113,12 +117,30 @@ impl AggregateExpr {
}
None => not_impl_err!("Aggregate without aggregate function is not supported"),
}?;
aggr_exprs.push(agg_func);
// permute col index refer to the output of post_mfp,
// so to help construct a mfp at the end
let mut post_map = post_mfp.unwrap_or(ScalarExpr::Column(0));
let cur_arity = all_aggr_exprs.len();
let remap = (0..aggr_expr.len()).map(|i| i + cur_arity).collect_vec();
post_map.permute(&remap)?;
all_aggr_exprs.extend(aggr_expr);
post_maps.push(post_map);
}
Ok(aggr_exprs)
let input_arity = all_aggr_exprs.len();
let aggr_arity = post_maps.len();
let post_mfp_final = MapFilterProject::new(all_aggr_exprs.len())
.map(post_maps)?
.project(input_arity..input_arity + aggr_arity)?;
Ok((all_aggr_exprs, post_mfp_final))
}
/// Convert AggregateFunction into Flow's AggregateExpr
///
/// the returned value is a tuple of AggregateExpr and a optional ScalarExpr that if exist is the final output of the aggregate function
/// since aggr functions like `avg` need to be transform to `sum(x)/cast(count(x) as x_type)`
pub fn from_substrait_agg_func(
f: &proto::AggregateFunction,
input_schema: &RelationType,
@@ -126,7 +148,7 @@ impl AggregateExpr {
filter: &Option<TypedExpr>,
order_by: &Option<Vec<TypedExpr>>,
distinct: bool,
) -> Result<AggregateExpr, Error> {
) -> Result<(Vec<AggregateExpr>, Option<ScalarExpr>), Error> {
// TODO(discord9): impl filter
let _ = filter;
let _ = order_by;
@@ -141,26 +163,74 @@ impl AggregateExpr {
args.push(arg_expr);
}
if args.len() != 1 {
return not_impl_err!("Aggregated function with multiple arguments is not supported");
}
let arg = if let Some(first) = args.first() {
first
} else {
return not_impl_err!("Aggregated function without arguments is not supported");
};
let func = match extensions.get(&f.function_reference) {
let fn_name = extensions
.get(&f.function_reference)
.cloned()
.map(|s| s.to_lowercase());
match fn_name.as_ref().map(|s| s.as_ref()) {
Some(Self::AVG_NAME) => AggregateExpr::from_avg_aggr_func(arg),
Some(function_name) => {
AggregateFunc::from_str_and_type(function_name, Some(arg.typ.scalar_type.clone()))
let func = AggregateFunc::from_str_and_type(
function_name,
Some(arg.typ.scalar_type.clone()),
)?;
let exprs = vec![AggregateExpr {
func,
expr: arg.expr.clone(),
distinct,
}];
let ret_mfp = None;
Ok((exprs, ret_mfp))
}
None => not_impl_err!(
"Aggregated function not found: function anchor = {:?}",
f.function_reference
),
}?;
Ok(AggregateExpr {
func,
}
}
const AVG_NAME: &'static str = "avg";
/// convert `avg` function into `sum(x)/cast(count(x) as x_type)`
fn from_avg_aggr_func(
arg: &TypedExpr,
) -> Result<(Vec<AggregateExpr>, Option<ScalarExpr>), Error> {
let arg_type = arg.typ.scalar_type.clone();
let sum = AggregateExpr {
func: AggregateFunc::from_str_and_type("sum", Some(arg_type.clone()))?,
expr: arg.expr.clone(),
distinct,
})
distinct: false,
};
let sum_out_type = sum.func.signature().output.clone();
let count = AggregateExpr {
func: AggregateFunc::Count,
expr: arg.expr.clone(),
distinct: false,
};
let count_out_type = count.func.signature().output.clone();
let avg_output = ScalarExpr::Column(0).call_binary(
ScalarExpr::Column(1).call_unary(UnaryFunc::Cast(sum_out_type.clone())),
BinaryFunc::div(sum_out_type.clone())?,
);
// make sure we wouldn't divide by zero
let zero = ScalarExpr::literal(count_out_type.default_value(), count_out_type.clone());
let non_zero = ScalarExpr::If {
cond: Box::new(ScalarExpr::Column(1).call_binary(zero.clone(), BinaryFunc::NotEq)),
then: Box::new(avg_output),
els: Box::new(ScalarExpr::literal(Value::Null, sum_out_type.clone())),
};
let ret_aggr_exprs = vec![sum, count];
let ret_mfp = Some(non_zero);
Ok((ret_aggr_exprs, ret_mfp))
}
}
@@ -217,6 +287,10 @@ impl KeyValPlan {
impl TypedPlan {
/// Convert AggregateRel into Flow's TypedPlan
///
/// The output of aggr plan is:
///
/// <group_exprs>..<aggr_exprs>
pub fn from_substrait_agg_rel(
ctx: &mut FlownodeContext,
agg: &proto::AggregateRel,
@@ -228,10 +302,28 @@ impl TypedPlan {
return not_impl_err!("Aggregate without an input is not supported");
};
let group_exprs =
TypedExpr::from_substrait_agg_grouping(ctx, &agg.groupings, &input.typ, extensions)?;
let group_exprs = {
let group_exprs = TypedExpr::from_substrait_agg_grouping(
ctx,
&agg.groupings,
&input.typ,
extensions,
)?;
let mut aggr_exprs =
TypedExpr::expand_multi_value(&input.typ, &group_exprs)?
};
let time_index = group_exprs.iter().position(|expr| {
matches!(
&expr.expr,
ScalarExpr::CallUnary {
func: UnaryFunc::TumbleWindowFloor { .. },
expr: _
}
)
});
let (mut aggr_exprs, post_mfp) =
AggregateExpr::from_substrait_agg_measures(ctx, &agg.measures, &input.typ, extensions)?;
let key_val_plan = KeyValPlan::from_substrait_gen_key_val_plan(
@@ -240,6 +332,7 @@ impl TypedPlan {
input.typ.column_types.len(),
)?;
// output type is group_exprs + aggr_exprs
let output_type = {
let mut output_types = Vec::new();
// first append group_expr as key, then aggr_expr as value
@@ -253,8 +346,13 @@ impl TypedPlan {
));
}
// TODO(discord9): try best to get time
RelationType::new(output_types).with_key((0..group_exprs.len()).collect_vec())
};
if group_exprs.is_empty() {
RelationType::new(output_types)
} else {
RelationType::new(output_types).with_key((0..group_exprs.len()).collect_vec())
}
}
.with_time_index(time_index);
// copy aggr_exprs to full_aggrs, and split them into simple_aggrs and distinct_aggrs
// also set them input/output column
@@ -289,15 +387,46 @@ impl TypedPlan {
key_val_plan,
reduce_plan: ReducePlan::Accumulable(accum_plan),
};
Ok(TypedPlan {
typ: output_type,
plan,
})
// FIX(discord9): deal with key first
if post_mfp.is_identity() {
Ok(TypedPlan {
typ: output_type,
plan,
})
} else {
// make post_mfp map identical mapping of keys
let input = TypedPlan {
typ: output_type.clone(),
plan,
};
let key_arity = group_exprs.len();
let mut post_mfp = post_mfp;
let val_arity = post_mfp.input_arity;
// offset post_mfp's col ref by `key_arity`
let shuffle = BTreeMap::from_iter((0..val_arity).map(|v| (v, v + key_arity)));
let new_arity = key_arity + val_arity;
post_mfp.permute(shuffle, new_arity)?;
// add key projection to post mfp
let (m, f, p) = post_mfp.into_map_filter_project();
let p = (0..key_arity).chain(p).collect_vec();
let post_mfp = MapFilterProject::new(new_arity)
.map(m)?
.filter(f)?
.project(p)?;
Ok(TypedPlan {
typ: output_type.apply_mfp(&post_mfp)?,
plan: Plan::Mfp {
input: Box::new(input),
mfp: post_mfp,
},
})
}
}
}
#[cfg(test)]
mod test {
use common_time::{DateTime, Interval};
use datatypes::prelude::ConcreteDataType;
use pretty_assertions::{assert_eq, assert_ne};
@@ -306,6 +435,283 @@ mod test {
use crate::repr::{self, ColumnType, RelationType};
use crate::transform::test::{create_test_ctx, create_test_query_engine, sql_to_substrait};
#[tokio::test]
async fn test_tumble_parse() {
let engine = create_test_query_engine();
let sql = "SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 hour', '2021-07-01 00:00:00')";
let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan).unwrap();
let aggr_expr = AggregateExpr {
func: AggregateFunc::SumUInt32,
expr: ScalarExpr::Column(0),
distinct: false,
};
let expected = TypedPlan {
typ: RelationType::new(vec![
ColumnType::new(CDT::uint64_datatype(), true), // sum(number)
ColumnType::new(CDT::datetime_datatype(), false), // window start
ColumnType::new(CDT::datetime_datatype(), false), // window end
]),
// TODO(discord9): mfp indirectly ref to key columns
/*
.with_key(vec![1])
.with_time_index(Some(0)),*/
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
input: Box::new(
Plan::Get {
id: crate::expr::Id::Global(GlobalId::User(1)),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
ColumnType::new(ConcreteDataType::datetime_datatype(), false),
])),
),
key_val_plan: KeyValPlan {
key_plan: MapFilterProject::new(2)
.map(vec![
ScalarExpr::Column(1).call_unary(
UnaryFunc::TumbleWindowFloor {
window_size: Interval::from_month_day_nano(
0,
0,
3_600_000_000_000,
),
start_time: Some(DateTime::new(1625097600000)),
},
),
ScalarExpr::Column(1).call_unary(
UnaryFunc::TumbleWindowCeiling {
window_size: Interval::from_month_day_nano(
0,
0,
3_600_000_000_000,
),
start_time: Some(DateTime::new(1625097600000)),
},
),
])
.unwrap()
.project(vec![2, 3])
.unwrap()
.into_safe(),
val_plan: MapFilterProject::new(2)
.project(vec![0, 1])
.unwrap()
.into_safe(),
},
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: vec![aggr_expr.clone()],
simple_aggrs: vec![AggrWithIndex::new(aggr_expr.clone(), 0, 0)],
distinct_aggrs: vec![],
}),
}
.with_types(
RelationType::new(vec![
ColumnType::new(CDT::datetime_datatype(), false), // window start
ColumnType::new(CDT::datetime_datatype(), false), // window end
ColumnType::new(CDT::uint64_datatype(), true), //sum(number)
])
.with_key(vec![1])
.with_time_index(Some(0)),
),
),
mfp: MapFilterProject::new(3)
.map(vec![
ScalarExpr::Column(2),
ScalarExpr::Column(3),
ScalarExpr::Column(0),
ScalarExpr::Column(1),
])
.unwrap()
.project(vec![4, 5, 6])
.unwrap(),
},
};
assert_eq!(flow_plan, expected);
}
#[tokio::test]
async fn test_avg_group_by() {
let engine = create_test_query_engine();
let sql = "SELECT avg(number), number FROM numbers GROUP BY number";
let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan);
let aggr_exprs = vec![
AggregateExpr {
func: AggregateFunc::SumUInt32,
expr: ScalarExpr::Column(0),
distinct: false,
},
AggregateExpr {
func: AggregateFunc::Count,
expr: ScalarExpr::Column(0),
distinct: false,
},
];
let avg_expr = ScalarExpr::If {
cond: Box::new(ScalarExpr::Column(2).call_binary(
ScalarExpr::Literal(Value::from(0i64), CDT::int64_datatype()),
BinaryFunc::NotEq,
)),
then: Box::new(ScalarExpr::Column(1).call_binary(
ScalarExpr::Column(2).call_unary(UnaryFunc::Cast(CDT::uint64_datatype())),
BinaryFunc::DivUInt64,
)),
els: Box::new(ScalarExpr::Literal(Value::Null, CDT::uint64_datatype())),
};
let expected = TypedPlan {
typ: RelationType::new(vec![
ColumnType::new(CDT::uint64_datatype(), true), // sum(number) -> u64
ColumnType::new(CDT::uint32_datatype(), false), // number
]),
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
input: Box::new(
Plan::Get {
id: crate::expr::Id::Global(GlobalId::User(0)),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
])),
),
key_val_plan: KeyValPlan {
key_plan: MapFilterProject::new(1)
.map(vec![ScalarExpr::Column(0)])
.unwrap()
.project(vec![1])
.unwrap()
.into_safe(),
val_plan: MapFilterProject::new(1)
.project(vec![0])
.unwrap()
.into_safe(),
},
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: aggr_exprs.clone(),
simple_aggrs: vec![
AggrWithIndex::new(aggr_exprs[0].clone(), 0, 0),
AggrWithIndex::new(aggr_exprs[1].clone(), 0, 1),
],
distinct_aggrs: vec![],
}),
}
.with_types(
RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint32_datatype(), false), // key: number
ColumnType::new(ConcreteDataType::uint64_datatype(), true), // sum
ColumnType::new(ConcreteDataType::int64_datatype(), true), // count
])
.with_key(vec![0]),
),
),
mfp: MapFilterProject::new(3)
.map(vec![
avg_expr, // col 3
// TODO(discord9): optimize mfp so to remove indirect ref
ScalarExpr::Column(3), // col 4
ScalarExpr::Column(0), // col 5
])
.unwrap()
.project(vec![4, 5])
.unwrap(),
},
};
assert_eq!(flow_plan.unwrap(), expected);
}
#[tokio::test]
async fn test_avg() {
let engine = create_test_query_engine();
let sql = "SELECT avg(number) FROM numbers";
let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan).unwrap();
let aggr_exprs = vec![
AggregateExpr {
func: AggregateFunc::SumUInt32,
expr: ScalarExpr::Column(0),
distinct: false,
},
AggregateExpr {
func: AggregateFunc::Count,
expr: ScalarExpr::Column(0),
distinct: false,
},
];
let avg_expr = ScalarExpr::If {
cond: Box::new(ScalarExpr::Column(1).call_binary(
ScalarExpr::Literal(Value::from(0i64), CDT::int64_datatype()),
BinaryFunc::NotEq,
)),
then: Box::new(ScalarExpr::Column(0).call_binary(
ScalarExpr::Column(1).call_unary(UnaryFunc::Cast(CDT::uint64_datatype())),
BinaryFunc::DivUInt64,
)),
els: Box::new(ScalarExpr::Literal(Value::Null, CDT::uint64_datatype())),
};
let expected = TypedPlan {
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
input: Box::new(
Plan::Get {
id: crate::expr::Id::Global(GlobalId::User(0)),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint32_datatype(), false),
])),
),
key_val_plan: KeyValPlan {
key_plan: MapFilterProject::new(1)
.project(vec![])
.unwrap()
.into_safe(),
val_plan: MapFilterProject::new(1)
.project(vec![0])
.unwrap()
.into_safe(),
},
reduce_plan: ReducePlan::Accumulable(AccumulablePlan {
full_aggrs: aggr_exprs.clone(),
simple_aggrs: vec![
AggrWithIndex::new(aggr_exprs[0].clone(), 0, 0),
AggrWithIndex::new(aggr_exprs[1].clone(), 0, 1),
],
distinct_aggrs: vec![],
}),
}
.with_types(RelationType::new(vec![
ColumnType::new(ConcreteDataType::uint64_datatype(), true),
ColumnType::new(ConcreteDataType::int64_datatype(), true),
])),
),
mfp: MapFilterProject::new(2)
.map(vec![
avg_expr,
// TODO(discord9): optimize mfp so to remove indirect ref
ScalarExpr::Column(2),
])
.unwrap()
.project(vec![3])
.unwrap(),
},
};
assert_eq!(flow_plan, expected);
}
#[tokio::test]
async fn test_sum() {
let engine = create_test_query_engine();
@@ -315,7 +721,7 @@ mod test {
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan);
let typ = RelationType::new(vec![ColumnType::new(
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
true,
)]);
let aggr_expr = AggregateExpr {
@@ -324,7 +730,7 @@ mod test {
distinct: false,
};
let expected = TypedPlan {
typ: RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), true)]),
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
@@ -355,9 +761,9 @@ mod test {
.with_types(typ),
),
mfp: MapFilterProject::new(1)
.map(vec![ScalarExpr::Column(0)])
.map(vec![ScalarExpr::Column(0), ScalarExpr::Column(1)])
.unwrap()
.project(vec![1])
.project(vec![2])
.unwrap(),
},
};
@@ -380,7 +786,7 @@ mod test {
};
let expected = TypedPlan {
typ: RelationType::new(vec![
ColumnType::new(CDT::uint32_datatype(), true), // col sum(number)
ColumnType::new(CDT::uint64_datatype(), true), // col sum(number)
ColumnType::new(CDT::uint32_datatype(), false), // col number
]),
plan: Plan::Mfp {
@@ -415,15 +821,19 @@ mod test {
.with_types(
RelationType::new(vec![
ColumnType::new(CDT::uint32_datatype(), false), // col number
ColumnType::new(CDT::uint32_datatype(), true), // col sum(number)
ColumnType::new(CDT::uint64_datatype(), true), // col sum(number)
])
.with_key(vec![0]),
),
),
mfp: MapFilterProject::new(2)
.map(vec![ScalarExpr::Column(1), ScalarExpr::Column(0)])
.map(vec![
ScalarExpr::Column(1),
ScalarExpr::Column(2),
ScalarExpr::Column(0),
])
.unwrap()
.project(vec![2, 3])
.project(vec![3, 4])
.unwrap(),
},
};
@@ -446,7 +856,7 @@ mod test {
distinct: false,
};
let expected = TypedPlan {
typ: RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), true)]),
typ: RelationType::new(vec![ColumnType::new(CDT::uint64_datatype(), true)]),
plan: Plan::Mfp {
input: Box::new(
Plan::Reduce {
@@ -478,14 +888,14 @@ mod test {
}),
}
.with_types(RelationType::new(vec![ColumnType::new(
CDT::uint32_datatype(),
CDT::uint64_datatype(),
true,
)])),
),
mfp: MapFilterProject::new(1)
.map(vec![ScalarExpr::Column(0)])
.map(vec![ScalarExpr::Column(0), ScalarExpr::Column(1)])
.unwrap()
.project(vec![1])
.project(vec![2])
.unwrap(),
},
};

View File

@@ -71,7 +71,7 @@ impl TypedExpr {
),
})?;
let arg_len = f.arguments.len();
let arg_exprs: Vec<TypedExpr> = f
let arg_typed_exprs: Vec<TypedExpr> = f
.arguments
.iter()
.map(|arg| match &arg.arg_type {
@@ -83,7 +83,8 @@ impl TypedExpr {
.try_collect()?;
// literal's type is determined by the function and type of other args
let (arg_exprs, arg_types): (Vec<_>, Vec<_>) = arg_exprs
let (arg_exprs, arg_types): (Vec<_>, Vec<_>) = arg_typed_exprs
.clone()
.into_iter()
.map(
|TypedExpr {
@@ -174,7 +175,9 @@ impl TypedExpr {
};
expr.optimize();
Ok(TypedExpr::new(expr, ret_type))
} else if let Ok(func) = UnmaterializableFunc::from_str(fn_name) {
} else if let Ok(func) =
UnmaterializableFunc::from_str_args(fn_name, arg_typed_exprs)
{
let ret_type = ColumnType::new_nullable(func.signature().output.clone());
Ok(TypedExpr::new(
ScalarExpr::CallUnmaterializable(func),

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use itertools::Itertools;
use snafu::OptionExt;
use substrait_proto::proto::expression::MaskExpression;
@@ -22,8 +24,8 @@ use substrait_proto::proto::{plan_rel, Plan as SubPlan, Rel};
use crate::adapter::error::{
Error, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu, UnexpectedSnafu,
};
use crate::expr::{MapFilterProject, TypedExpr};
use crate::plan::{Plan, TypedPlan};
use crate::expr::{MapFilterProject, ScalarExpr, TypedExpr, UnaryFunc};
use crate::plan::{KeyValPlan, Plan, ReducePlan, TypedPlan};
use crate::repr::{self, RelationType};
use crate::transform::{substrait_proto, FlownodeContext, FunctionExtensions};
@@ -75,6 +77,7 @@ impl TypedPlan {
} else {
return not_impl_err!("Projection without an input is not supported");
};
let mut exprs: Vec<TypedExpr> = vec![];
for e in &p.expressions {
let expr = TypedExpr::from_substrait_rex(e, &input.typ, extensions)?;
@@ -97,6 +100,127 @@ impl TypedPlan {
};
Ok(TypedPlan { typ, plan })
} else {
/// if reduce_plan contains the special function like tumble floor/ceiling, add them to the proj_exprs
fn rewrite_projection_after_reduce(
key_val_plan: KeyValPlan,
_reduce_plan: ReducePlan,
reduce_output_type: &RelationType,
proj_exprs: &mut Vec<TypedExpr>,
) -> Result<(), Error> {
// TODO: get keys correctly
let key_exprs = key_val_plan
.key_plan
.projection
.clone()
.into_iter()
.map(|i| {
if i < key_val_plan.key_plan.input_arity {
ScalarExpr::Column(i)
} else {
key_val_plan.key_plan.expressions
[i - key_val_plan.key_plan.input_arity]
.clone()
}
})
.collect_vec();
let mut shift_offset = 0;
let special_keys = key_exprs
.into_iter()
.enumerate()
.filter(|(_idx, p)| {
if matches!(
p,
ScalarExpr::CallUnary {
func: UnaryFunc::TumbleWindowFloor { .. },
..
} | ScalarExpr::CallUnary {
func: UnaryFunc::TumbleWindowCeiling { .. },
..
}
) {
if matches!(
p,
ScalarExpr::CallUnary {
func: UnaryFunc::TumbleWindowFloor { .. },
..
}
) {
shift_offset += 1;
}
true
} else {
false
}
})
.collect_vec();
let spec_key_arity = special_keys.len();
if spec_key_arity == 0 {
return Ok(());
}
{
// shift proj_exprs to the right by spec_key_arity
let max_used_col_in_proj = proj_exprs
.iter()
.map(|expr| {
expr.expr
.get_all_ref_columns()
.into_iter()
.max()
.unwrap_or_default()
})
.max()
.unwrap_or_default();
let shuffle = (0..=max_used_col_in_proj)
.map(|col| (col, col + shift_offset))
.collect::<BTreeMap<_, _>>();
for proj_expr in proj_exprs.iter_mut() {
proj_expr.expr.permute_map(&shuffle)?;
} // add key to the end
for (key_idx, _key_expr) in special_keys {
// here we assume the output type of reduce operator is just first keys columns, then append value columns
proj_exprs.push(
ScalarExpr::Column(key_idx).with_type(
reduce_output_type.column_types[key_idx].clone(),
),
);
}
}
Ok(())
}
match input.plan.clone() {
Plan::Reduce {
key_val_plan,
reduce_plan,
..
} => {
rewrite_projection_after_reduce(
key_val_plan,
reduce_plan,
&input.typ,
&mut exprs,
)?;
}
Plan::Mfp { input, mfp: _ } => {
if let Plan::Reduce {
key_val_plan,
reduce_plan,
..
} = input.plan
{
rewrite_projection_after_reduce(
key_val_plan,
reduce_plan,
&input.typ,
&mut exprs,
)?;
}
}
_ => (),
}
input.projection(exprs)
}
}

View File

@@ -31,6 +31,7 @@ use crate::repr::{value_to_internal_ts, Diff, DiffRow, Duration, KeyValDiffRow,
pub type Batch = BTreeMap<Row, SmallVec<[DiffRow; 2]>>;
/// A spine of batches, arranged by timestamp
/// TODO(discord9): consider internally index by key, value, and timestamp for faster lookup
pub type Spine = BTreeMap<Timestamp, Batch>;
/// Determine when should a key expire according to it's event timestamp in key.
@@ -51,6 +52,17 @@ pub struct KeyExpiryManager {
}
impl KeyExpiryManager {
pub fn new(
key_expiration_duration: Option<Duration>,
event_timestamp_from_row: Option<ScalarExpr>,
) -> Self {
Self {
event_ts_to_key: Default::default(),
key_expiration_duration,
event_timestamp_from_row,
}
}
/// Extract event timestamp from key row.
///
/// If no expire state is set, return None.
@@ -177,6 +189,10 @@ impl Arrangement {
}
}
pub fn set_expire_state(&mut self, expire_state: KeyExpiryManager) {
self.expire_state = Some(expire_state);
}
/// Apply updates into spine, with no respect of whether the updates are in futures, past, or now.
///
/// Return the maximum expire time (already expire by how much time) of all updates if any keys is already expired.

View File

@@ -31,16 +31,19 @@ use snafu::{OptionExt, ResultExt};
use crate::error::{InvalidRegionRequestSnafu, InvokeRegionServerSnafu, Result};
pub struct StandaloneDatanodeManager(pub RegionServer);
pub struct StandaloneDatanodeManager {
pub region_server: RegionServer,
pub flow_server: FlownodeRef,
}
#[async_trait]
impl NodeManager for StandaloneDatanodeManager {
async fn datanode(&self, _datanode: &Peer) -> DatanodeRef {
RegionInvoker::arc(self.0.clone())
RegionInvoker::arc(self.region_server.clone())
}
async fn flownode(&self, _node: &Peer) -> FlownodeRef {
unimplemented!()
self.flow_server.clone()
}
}

View File

@@ -24,8 +24,6 @@ pub struct DatanodeOptions {
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DatanodeClientOptions {
#[serde(with = "humantime_serde")]
pub timeout: Duration,
#[serde(with = "humantime_serde")]
pub connect_timeout: Duration,
pub tcp_nodelay: bool,
@@ -34,7 +32,6 @@ pub struct DatanodeClientOptions {
impl Default for DatanodeClientOptions {
fn default() -> Self {
Self {
timeout: Duration::from_secs(channel_manager::DEFAULT_GRPC_REQUEST_TIMEOUT_SECS),
connect_timeout: Duration::from_secs(
channel_manager::DEFAULT_GRPC_CONNECT_TIMEOUT_SECS,
),

View File

@@ -18,15 +18,15 @@ use async_trait::async_trait;
use crate::error::Result;
use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
use crate::metasrv::Context;
use crate::pubsub::{Message, PublishRef};
use crate::pubsub::{Message, PublisherRef};
pub struct PublishHeartbeatHandler {
publish: PublishRef,
publisher: PublisherRef,
}
impl PublishHeartbeatHandler {
pub fn new(publish: PublishRef) -> PublishHeartbeatHandler {
PublishHeartbeatHandler { publish }
pub fn new(publisher: PublisherRef) -> PublishHeartbeatHandler {
PublishHeartbeatHandler { publisher }
}
}
@@ -43,7 +43,7 @@ impl HeartbeatHandler for PublishHeartbeatHandler {
_: &mut HeartbeatAccumulator,
) -> Result<HandleControl> {
let msg = Message::Heartbeat(Box::new(req.clone()));
self.publish.send_msg(msg).await;
self.publisher.publish(msg).await;
Ok(HandleControl::Continue)
}

View File

@@ -53,7 +53,7 @@ use crate::handler::HeartbeatHandlerGroup;
use crate::lease::lookup_alive_datanode_peer;
use crate::lock::DistLockRef;
use crate::procedure::region_migration::manager::RegionMigrationManagerRef;
use crate::pubsub::{PublishRef, SubscribeManagerRef};
use crate::pubsub::{PublisherRef, SubscriptionManagerRef};
use crate::selector::{Selector, SelectorType};
use crate::service::mailbox::MailboxRef;
use crate::service::store::cached_kv::LeaderCachedKvBackend;
@@ -256,7 +256,7 @@ pub type ElectionRef = Arc<dyn Election<Leader = LeaderValue>>;
pub struct MetaStateHandler {
procedure_manager: ProcedureManagerRef,
wal_options_allocator: WalOptionsAllocatorRef,
subscribe_manager: Option<SubscribeManagerRef>,
subscribe_manager: Option<SubscriptionManagerRef>,
greptimedb_telemetry_task: Arc<GreptimeDBTelemetryTask>,
leader_cached_kv_backend: Arc<LeaderCachedKvBackend>,
state: StateRef,
@@ -295,7 +295,7 @@ impl MetaStateHandler {
if let Some(sub_manager) = self.subscribe_manager.clone() {
info!("Leader changed, un_subscribe all");
if let Err(e) = sub_manager.un_subscribe_all() {
if let Err(e) = sub_manager.unsubscribe_all() {
error!("Failed to un_subscribe all, error: {}", e);
}
}
@@ -351,7 +351,7 @@ impl Metasrv {
let procedure_manager = self.procedure_manager.clone();
let in_memory = self.in_memory.clone();
let leader_cached_kv_backend = self.leader_cached_kv_backend.clone();
let subscribe_manager = self.subscribe_manager();
let subscribe_manager = self.subscription_manager();
let mut rx = election.subscribe_leader_change();
let greptimedb_telemetry_task = self.greptimedb_telemetry_task.clone();
greptimedb_telemetry_task
@@ -540,12 +540,12 @@ impl Metasrv {
&self.region_migration_manager
}
pub fn publish(&self) -> Option<PublishRef> {
self.plugins.get::<PublishRef>()
pub fn publish(&self) -> Option<PublisherRef> {
self.plugins.get::<PublisherRef>()
}
pub fn subscribe_manager(&self) -> Option<SubscribeManagerRef> {
self.plugins.get::<SubscribeManagerRef>()
pub fn subscription_manager(&self) -> Option<SubscriptionManagerRef> {
self.plugins.get::<SubscriptionManagerRef>()
}
pub fn plugins(&self) -> &Plugins {

View File

@@ -66,7 +66,7 @@ use crate::metasrv::{
use crate::procedure::region_failover::RegionFailoverManager;
use crate::procedure::region_migration::manager::RegionMigrationManager;
use crate::procedure::region_migration::DefaultContextFactory;
use crate::pubsub::PublishRef;
use crate::pubsub::PublisherRef;
use crate::selector::lease_based::LeaseBasedSelector;
use crate::service::mailbox::MailboxRef;
use crate::service::store::cached_kv::LeaderCachedKvBackend;
@@ -320,7 +320,7 @@ impl MetasrvBuilder {
let publish_heartbeat_handler = plugins
.clone()
.and_then(|plugins| plugins.get::<PublishRef>())
.and_then(|plugins| plugins.get::<PublisherRef>())
.map(|publish| PublishHeartbeatHandler::new(publish.clone()));
let region_lease_handler = RegionLeaseHandler::new(

View File

@@ -20,10 +20,10 @@ mod subscriber;
#[cfg(test)]
mod tests;
pub use publish::{DefaultPublish, Publish, PublishRef};
pub use publish::{DefaultPublisher, Publisher, PublisherRef};
pub use subscribe_manager::{
AddSubRequest, DefaultSubscribeManager, SubscribeManager, SubscribeManagerRef, SubscribeQuery,
UnSubRequest,
DefaultSubscribeManager, SubscribeRequest, SubscriptionManager, SubscriptionManagerRef,
SubscriptionQuery, UnsubscribeRequest,
};
pub use subscriber::{Subscriber, SubscriberRef, Transport};

View File

@@ -18,53 +18,53 @@ use std::sync::Arc;
use common_telemetry::error;
use crate::pubsub::{Message, SubscribeManager, Transport, UnSubRequest};
use crate::pubsub::{Message, SubscriptionManager, Transport, UnsubscribeRequest};
/// This trait provides a `send_msg` method that can be used by other modules
/// This trait provides a `publish` method that can be used by other modules
/// of meta to publish [Message].
#[async_trait::async_trait]
pub trait Publish: Send + Sync {
async fn send_msg(&self, message: Message);
pub trait Publisher: Send + Sync {
async fn publish(&self, message: Message);
}
pub type PublishRef = Arc<dyn Publish>;
pub type PublisherRef = Arc<dyn Publisher>;
/// The default implementation of [Publish]
pub struct DefaultPublish<M, T> {
subscribe_manager: Arc<M>,
/// The default implementation of [Publisher]
pub struct DefaultPublisher<M, T> {
subscription_manager: Arc<M>,
_transport: PhantomData<T>,
}
impl<M, T> DefaultPublish<M, T> {
pub fn new(subscribe_manager: Arc<M>) -> Self {
impl<M, T> DefaultPublisher<M, T> {
pub fn new(subscription_manager: Arc<M>) -> Self {
Self {
subscribe_manager,
subscription_manager,
_transport: PhantomData,
}
}
}
#[async_trait::async_trait]
impl<M, T> Publish for DefaultPublish<M, T>
impl<M, T> Publisher for DefaultPublisher<M, T>
where
M: SubscribeManager<T>,
M: SubscriptionManager<T>,
T: Transport + Debug,
{
async fn send_msg(&self, message: Message) {
let sub_list = self
.subscribe_manager
async fn publish(&self, message: Message) {
let subscribers = self
.subscription_manager
.subscribers_by_topic(&message.topic());
for sub in sub_list {
if sub.transport_msg(message.clone()).await.is_err() {
for subscriber in subscribers {
if subscriber.transport_msg(message.clone()).await.is_err() {
// If an error occurs, we consider the subscriber offline,
// so un_subscribe here.
let req = UnSubRequest {
subscriber_id: sub.id(),
let req = UnsubscribeRequest {
subscriber_id: subscriber.id(),
};
if let Err(e) = self.subscribe_manager.un_subscribe(req.clone()) {
error!(e; "failed to un_subscribe, req: {:?}", req);
if let Err(e) = self.subscription_manager.unsubscribe(req.clone()) {
error!(e; "failed to unsubscribe, req: {:?}", req);
}
}
}

View File

@@ -21,94 +21,92 @@ use tokio::sync::mpsc::Sender;
use crate::error::Result;
use crate::pubsub::{Message, Subscriber, SubscriberRef, Topic, Transport};
pub trait SubscribeQuery<T>: Send + Sync {
pub trait SubscriptionQuery<T>: Send + Sync {
fn subscribers_by_topic(&self, topic: &Topic) -> Vec<SubscriberRef<T>>;
}
pub trait SubscribeManager<T>: SubscribeQuery<T> {
fn subscribe(&self, req: AddSubRequest<T>) -> Result<()>;
pub trait SubscriptionManager<T>: SubscriptionQuery<T> {
fn subscribe(&self, req: SubscribeRequest<T>) -> Result<()>;
fn un_subscribe(&self, req: UnSubRequest) -> Result<()>;
fn unsubscribe(&self, req: UnsubscribeRequest) -> Result<()>;
fn un_subscribe_all(&self) -> Result<()>;
fn unsubscribe_all(&self) -> Result<()>;
}
pub type SubscribeManagerRef = Arc<dyn SubscribeManager<Sender<Message>>>;
pub type SubscriptionManagerRef = Arc<dyn SubscriptionManager<Sender<Message>>>;
pub struct AddSubRequest<T> {
pub topic_list: Vec<Topic>,
pub struct SubscribeRequest<T> {
pub topics: Vec<Topic>,
pub subscriber: Subscriber<T>,
}
#[derive(Debug, Clone)]
pub struct UnSubRequest {
pub struct UnsubscribeRequest {
pub subscriber_id: u32,
}
pub struct DefaultSubscribeManager<T> {
topic2sub: DashMap<Topic, Vec<Arc<Subscriber<T>>>>,
topic_to_subscribers: DashMap<Topic, Vec<Arc<Subscriber<T>>>>,
}
impl<T> Default for DefaultSubscribeManager<T> {
fn default() -> Self {
Self {
topic2sub: DashMap::new(),
topic_to_subscribers: DashMap::new(),
}
}
}
impl<T> SubscribeQuery<T> for DefaultSubscribeManager<T>
impl<T> SubscriptionQuery<T> for DefaultSubscribeManager<T>
where
T: Transport,
{
fn subscribers_by_topic(&self, topic: &Topic) -> Vec<SubscriberRef<T>> {
self.topic2sub
self.topic_to_subscribers
.get(topic)
.map(|list_ref| list_ref.clone())
.unwrap_or_default()
}
}
impl<T> SubscribeManager<T> for DefaultSubscribeManager<T>
impl<T> SubscriptionManager<T> for DefaultSubscribeManager<T>
where
T: Transport,
{
fn subscribe(&self, req: AddSubRequest<T>) -> Result<()> {
let AddSubRequest {
topic_list,
subscriber,
} = req;
fn subscribe(&self, req: SubscribeRequest<T>) -> Result<()> {
let SubscribeRequest { topics, subscriber } = req;
info!(
"Add a subscription, subscriber_id: {}, subscriber_name: {}, topic list: {:?}",
"Add a subscriber, subscriber_id: {}, subscriber_name: {}, topics: {:?}",
subscriber.id(),
subscriber.name(),
topic_list
topics
);
let subscriber = Arc::new(subscriber);
for topic in topic_list {
let mut entry = self.topic2sub.entry(topic).or_default();
for topic in topics {
let mut entry = self.topic_to_subscribers.entry(topic).or_default();
entry.push(subscriber.clone());
}
Ok(())
}
fn un_subscribe(&self, req: UnSubRequest) -> Result<()> {
let UnSubRequest { subscriber_id } = req;
fn unsubscribe(&self, req: UnsubscribeRequest) -> Result<()> {
let UnsubscribeRequest { subscriber_id } = req;
info!("Add a un_subscription, subscriber_id: {}", subscriber_id);
info!("Remove a subscriber, subscriber_id: {}", subscriber_id);
for mut sub_list in self.topic2sub.iter_mut() {
sub_list.retain(|subscriber| subscriber.id() != subscriber_id)
for mut subscribers in self.topic_to_subscribers.iter_mut() {
subscribers.retain(|subscriber| subscriber.id() != subscriber_id)
}
Ok(())
}
fn un_subscribe_all(&self) -> Result<()> {
self.topic2sub.clear();
fn unsubscribe_all(&self) -> Result<()> {
self.topic_to_subscribers.clear();
Ok(())
}

View File

@@ -19,8 +19,8 @@ use tokio::sync::mpsc::{Receiver, Sender};
use super::DefaultSubscribeManager;
use crate::pubsub::{
AddSubRequest, DefaultPublish, Message, Publish, SubscribeManager, SubscribeQuery, Subscriber,
Topic, UnSubRequest,
DefaultPublisher, Message, Publisher, SubscribeRequest, Subscriber, SubscriptionManager,
SubscriptionQuery, Topic, UnsubscribeRequest,
};
#[tokio::test]
@@ -28,15 +28,15 @@ async fn test_pubsub() {
let manager = Arc::new(DefaultSubscribeManager::default());
let (subscriber1, mut rx1) = mock_subscriber(1, "tidigong");
let req = AddSubRequest {
topic_list: vec![Topic::Heartbeat],
let req = SubscribeRequest {
topics: vec![Topic::Heartbeat],
subscriber: subscriber1,
};
manager.subscribe(req).unwrap();
let (subscriber2, mut rx2) = mock_subscriber(2, "gcrm");
let req = AddSubRequest {
topic_list: vec![Topic::Heartbeat],
let req = SubscribeRequest {
topics: vec![Topic::Heartbeat],
subscriber: subscriber2,
};
manager.subscribe(req).unwrap();
@@ -44,10 +44,10 @@ async fn test_pubsub() {
let manager_clone = manager.clone();
let message_number: usize = 5;
tokio::spawn(async move {
let publisher: DefaultPublish<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
DefaultPublish::new(manager_clone);
let publisher: DefaultPublisher<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
DefaultPublisher::new(manager_clone);
for _ in 0..message_number {
publisher.send_msg(mock_message()).await;
publisher.publish(mock_message()).await;
}
});
@@ -59,12 +59,12 @@ async fn test_pubsub() {
}
manager
.un_subscribe(UnSubRequest { subscriber_id: 1 })
.unsubscribe(UnsubscribeRequest { subscriber_id: 1 })
.unwrap();
let may_msg = rx1.recv().await;
assert!(may_msg.is_none());
manager.un_subscribe_all().unwrap();
manager.unsubscribe_all().unwrap();
let may_msg = rx2.recv().await;
assert!(may_msg.is_none());
}
@@ -74,15 +74,15 @@ async fn test_subscriber_disconnect() {
let manager = Arc::new(DefaultSubscribeManager::default());
let (subscriber1, rx1) = mock_subscriber(1, "tidigong");
let req = AddSubRequest {
topic_list: vec![Topic::Heartbeat],
let req = SubscribeRequest {
topics: vec![Topic::Heartbeat],
subscriber: subscriber1,
};
manager.subscribe(req).unwrap();
let (subscriber2, rx2) = mock_subscriber(2, "gcrm");
let req = AddSubRequest {
topic_list: vec![Topic::Heartbeat],
let req = SubscribeRequest {
topics: vec![Topic::Heartbeat],
subscriber: subscriber2,
};
manager.subscribe(req).unwrap();
@@ -90,10 +90,10 @@ async fn test_subscriber_disconnect() {
let manager_clone = manager.clone();
let message_number: usize = 5;
let join = tokio::spawn(async move {
let publisher: DefaultPublish<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
DefaultPublish::new(manager_clone);
let publisher: DefaultPublisher<DefaultSubscribeManager<Sender<Message>>, Sender<Message>> =
DefaultPublisher::new(manager_clone);
for _ in 0..message_number {
publisher.send_msg(mock_message()).await;
publisher.publish(mock_message()).await;
}
});
@@ -118,8 +118,8 @@ fn test_sub_manager() {
let manager = DefaultSubscribeManager::default();
let subscriber = mock_subscriber(1, "tidigong").0;
let req = AddSubRequest {
topic_list: vec![Topic::Heartbeat],
let req = SubscribeRequest {
topics: vec![Topic::Heartbeat],
subscriber,
};
manager.subscribe(req).unwrap();
@@ -127,21 +127,21 @@ fn test_sub_manager() {
assert_eq!(1, ret.len());
let subscriber = mock_subscriber(2, "gcrm").0;
let req = AddSubRequest {
topic_list: vec![Topic::Heartbeat],
let req = SubscribeRequest {
topics: vec![Topic::Heartbeat],
subscriber,
};
manager.subscribe(req).unwrap();
let ret = manager.subscribers_by_topic(&Topic::Heartbeat);
assert_eq!(2, ret.len());
let req = UnSubRequest { subscriber_id: 1 };
manager.un_subscribe(req).unwrap();
let req = UnsubscribeRequest { subscriber_id: 1 };
manager.unsubscribe(req).unwrap();
let ret = manager.subscribers_by_topic(&Topic::Heartbeat);
assert_eq!(1, ret.len());
let req = UnSubRequest { subscriber_id: 2 };
manager.un_subscribe(req).unwrap();
let req = UnsubscribeRequest { subscriber_id: 2 };
manager.unsubscribe(req).unwrap();
let ret = manager.subscribers_by_topic(&Topic::Heartbeat);
assert_eq!(0, ret.len());
}

View File

@@ -396,7 +396,7 @@ impl TwcsCompactionTask {
compacted_inputs.extend(output.inputs.iter().map(FileHandle::meta));
info!(
"Compaction region {} output [{}]-> {}",
"Compaction region {}. Input [{}] -> output {}",
self.region_id,
output
.inputs

View File

@@ -18,7 +18,7 @@ use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use common_telemetry::{error, info};
use common_telemetry::{debug, error, info};
use smallvec::SmallVec;
use snafu::ResultExt;
use store_api::storage::RegionId;
@@ -118,7 +118,7 @@ impl WriteBufferManager for WriteBufferManagerImpl {
fn should_flush_engine(&self) -> bool {
let mutable_memtable_memory_usage = self.memory_active.load(Ordering::Relaxed);
if mutable_memtable_memory_usage > self.mutable_limit {
info!(
debug!(
"Engine should flush (over mutable limit), mutable_usage: {}, memory_usage: {}, mutable_limit: {}, global_limit: {}",
mutable_memtable_memory_usage, self.memory_usage(), self.mutable_limit, self.global_write_buffer_size,
);
@@ -132,7 +132,7 @@ impl WriteBufferManager for WriteBufferManagerImpl {
if memory_usage >= self.global_write_buffer_size
&& mutable_memtable_memory_usage >= self.global_write_buffer_size / 2
{
info!(
debug!(
"Engine should flush (over total limit), memory_usage: {}, global_write_buffer_size: {}, \
mutable_usage: {}.",
memory_usage,

View File

@@ -85,8 +85,8 @@ impl<S> RegionWorkerLoop<S> {
let mut max_mem_region = None;
for region in &regions {
if self.flush_scheduler.is_flush_requested(region.region_id) {
// Already flushing.
if self.flush_scheduler.is_flush_requested(region.region_id) || !region.is_writable() {
// Already flushing or not writable.
continue;
}
@@ -134,8 +134,8 @@ impl<S> RegionWorkerLoop<S> {
let min_last_flush_time = now - self.config.auto_flush_interval.as_millis() as i64;
for region in &regions {
if self.flush_scheduler.is_flush_requested(region.region_id) {
// Already flushing.
if self.flush_scheduler.is_flush_requested(region.region_id) || !region.is_writable() {
// Already flushing or not writable.
continue;
}

View File

@@ -321,6 +321,7 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<
connection,
with,
table_name,
limit,
..
} = match stmt {
CopyTable::To(arg) => arg,
@@ -347,6 +348,7 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result<
pattern,
direction,
timestamp_range,
limit,
})
}

View File

@@ -24,7 +24,9 @@ use object_store::Entry;
use regex::Regex;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
use table::table_reference::TableReference;
use crate::error;
use crate::error::{CatalogSnafu, InvalidCopyDatabasePathSnafu};
@@ -65,11 +67,29 @@ impl StatementExecutor {
let mut exported_rows = 0;
for table_name in table_names {
// TODO(hl): also handles tables with metric engine.
// TODO(hl): remove this hardcode once we've removed numbers table.
if table_name == "numbers" {
continue;
}
let table = self
.get_table(&TableReference {
catalog: &req.catalog_name,
schema: &req.schema_name,
table: &table_name,
})
.await?;
// Ignores physical tables of metric engine.
if table.table_info().meta.engine == METRIC_ENGINE_NAME
&& !table
.table_info()
.meta
.options
.extra_options
.contains_key(LOGICAL_TABLE_METADATA_KEY)
{
continue;
}
let mut table_file = req.location.clone();
table_file.push_str(&table_name);
table_file.push_str(suffix);
@@ -90,6 +110,7 @@ impl StatementExecutor {
pattern: None,
direction: CopyDirection::Export,
timestamp_range: req.time_range,
limit: None,
},
ctx.clone(),
)
@@ -155,6 +176,7 @@ impl StatementExecutor {
pattern: None,
direction: CopyDirection::Import,
timestamp_range: None,
limit: None,
};
debug!("Copy table, arg: {:?}", req);
match self.copy_table_from(req, ctx.clone()).await {

View File

@@ -52,8 +52,6 @@ use crate::statement::StatementExecutor;
const DEFAULT_BATCH_SIZE: usize = 8192;
const DEFAULT_READ_BUFFER: usize = 256 * 1024;
const MAX_INSERT_ROWS: &str = "max_insert_rows";
const DEFAULT_MAX_INSERT_ROWS: usize = 1000;
enum FileMetadata {
Parquet {
@@ -379,11 +377,7 @@ impl StatementExecutor {
let mut rows_inserted = 0;
let mut insert_cost = 0;
let max_insert_rows = req
.with
.get(MAX_INSERT_ROWS)
.and_then(|val| val.parse::<usize>().ok())
.unwrap_or(DEFAULT_MAX_INSERT_ROWS);
let max_insert_rows = req.limit.map(|n| n as usize);
for (compat_schema, file_schema_projection, projected_table_schema, file_metadata) in files
{
let mut stream = self
@@ -435,8 +429,10 @@ impl StatementExecutor {
insert_cost += cost;
}
if rows_inserted >= max_insert_rows {
return Ok(gen_insert_output(rows_inserted, insert_cost));
if let Some(max_insert_rows) = max_insert_rows {
if rows_inserted >= max_insert_rows {
return Ok(gen_insert_output(rows_inserted, insert_cost));
}
}
}

View File

@@ -342,12 +342,16 @@ impl InstantManipulateStream {
// and the function `vectorSelectorSingle`
pub fn manipulate(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
let mut take_indices = vec![];
// TODO(ruihang): maybe the input is not timestamp millisecond array
let ts_column = input
.column(self.time_index)
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.unwrap();
.ok_or_else(|| {
DataFusionError::Execution(
"Time index Column downcast to TimestampMillisecondArray failed".into(),
)
})?;
// field column for staleness check
let field_column = self

View File

@@ -250,12 +250,15 @@ pub struct SeriesNormalizeStream {
impl SeriesNormalizeStream {
pub fn normalize(&self, input: RecordBatch) -> DataFusionResult<RecordBatch> {
// TODO(ruihang): maybe the input is not timestamp millisecond array
let ts_column = input
.column(self.time_index)
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.unwrap();
.ok_or_else(|| {
DataFusionError::Execution(
"Time index Column downcast to TimestampMillisecondArray failed".into(),
)
})?;
// bias the timestamp column by offset
let ts_column_biased = if self.offset == 0 {

View File

@@ -433,7 +433,7 @@ impl RangeManipulateStream {
pub fn manipulate(&self, input: RecordBatch) -> DataFusionResult<Option<RecordBatch>> {
let mut other_columns = (0..input.columns().len()).collect::<HashSet<_>>();
// calculate the range
let (aligned_ts, ranges) = self.calculate_range(&input);
let (aligned_ts, ranges) = self.calculate_range(&input)?;
// ignore this if all ranges are empty
if ranges.iter().all(|(_, len)| *len == 0) {
return Ok(None);
@@ -472,12 +472,19 @@ impl RangeManipulateStream {
.map_err(|e| DataFusionError::ArrowError(e, None))
}
fn calculate_range(&self, input: &RecordBatch) -> (ArrayRef, Vec<(u32, u32)>) {
fn calculate_range(
&self,
input: &RecordBatch,
) -> DataFusionResult<(ArrayRef, Vec<(u32, u32)>)> {
let ts_column = input
.column(self.time_index)
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.unwrap();
.ok_or_else(|| {
DataFusionError::Execution(
"Time index Column downcast to TimestampMillisecondArray failed".into(),
)
})?;
let mut aligned_ts = vec![];
let mut ranges = vec![];
@@ -506,7 +513,7 @@ impl RangeManipulateStream {
let aligned_ts_array = Arc::new(TimestampMillisecondArray::from(aligned_ts)) as _;
(aligned_ts_array, ranges)
Ok((aligned_ts_array, ranges))
}
}

View File

@@ -35,7 +35,8 @@ use datafusion::prelude::{Column, Expr as DfExpr, JoinType};
use datafusion::scalar::ScalarValue;
use datafusion::sql::TableReference;
use datafusion_expr::utils::conjunction;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
use datatypes::data_type::ConcreteDataType;
use itertools::Itertools;
use promql_parser::label::{MatchOp, Matcher, Matchers, METRIC_NAME};
use promql_parser::parser::{
@@ -910,9 +911,62 @@ impl PromPlanner {
.resolve_table(table_ref.clone())
.await
.context(CatalogSnafu)?;
// Safety: `scan_filters` is not empty.
let result = LogicalPlanBuilder::scan(table_ref, provider, None)
let is_time_index_ms = provider
.as_any()
.downcast_ref::<DefaultTableSource>()
.context(UnknownTableSnafu)?
.table_provider
.as_any()
.downcast_ref::<DfTableProviderAdapter>()
.context(UnknownTableSnafu)?
.table()
.schema()
.timestamp_column()
.with_context(|| TimeIndexNotFoundSnafu {
table: table_ref.to_quoted_string(),
})?
.data_type
== ConcreteDataType::timestamp_millisecond_datatype();
let mut scan_plan = LogicalPlanBuilder::scan(table_ref.clone(), provider, None)
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu)?;
if !is_time_index_ms {
// cast to ms if time_index not in Millisecond precision
let expr: Vec<_> = self
.ctx
.field_columns
.iter()
.map(|col| DfExpr::Column(Column::new(Some(table_ref.clone()), col.clone())))
.chain(self.create_tag_column_exprs()?)
.chain(Some(DfExpr::Alias(Alias {
expr: Box::new(DfExpr::Cast(Cast {
expr: Box::new(self.create_time_index_column_expr()?),
data_type: ArrowDataType::Timestamp(ArrowTimeUnit::Millisecond, None),
})),
relation: Some(table_ref.clone()),
name: self
.ctx
.time_index_column
.as_ref()
.with_context(|| TimeIndexNotFoundSnafu {
table: table_ref.to_quoted_string(),
})?
.clone(),
})))
.collect::<Vec<_>>();
scan_plan = LogicalPlanBuilder::from(scan_plan)
.project(expr)
.context(DataFusionPlanningSnafu)?
.build()
.context(DataFusionPlanningSnafu)?;
}
// Safety: `scan_filters` is not empty.
let result = LogicalPlanBuilder::from(scan_plan)
.filter(conjunction(filter).unwrap())
.context(DataFusionPlanningSnafu)?
.build()
@@ -2972,4 +3026,99 @@ mod test {
assert!(plan.is_err(), "query: {:?}", query);
}
}
#[tokio::test]
async fn test_non_ms_precision() {
let catalog_list = MemoryCatalogManager::with_default_setup();
let columns = vec![
ColumnSchema::new(
"tag".to_string(),
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
"timestamp".to_string(),
ConcreteDataType::timestamp_nanosecond_datatype(),
false,
)
.with_time_index(true),
ColumnSchema::new(
"field".to_string(),
ConcreteDataType::float64_datatype(),
true,
),
];
let schema = Arc::new(Schema::new(columns));
let table_meta = TableMetaBuilder::default()
.schema(schema)
.primary_key_indices(vec![0])
.value_indices(vec![2])
.next_column_id(1024)
.build()
.unwrap();
let table_info = TableInfoBuilder::default()
.name("metrics".to_string())
.meta(table_meta)
.build()
.unwrap();
let table = EmptyTable::from_table_info(&table_info);
assert!(catalog_list
.register_table_sync(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "metrics".to_string(),
table_id: 1024,
table,
})
.is_ok());
let plan = PromPlanner::stmt_to_plan(
DfTableSourceProvider::new(catalog_list.clone(), false, QueryContext::arc().as_ref()),
EvalStmt {
expr: parser::parse("metrics{tag = \"1\"}").unwrap(),
start: UNIX_EPOCH,
end: UNIX_EPOCH
.checked_add(Duration::from_secs(100_000))
.unwrap(),
interval: Duration::from_secs(5),
lookback_delta: Duration::from_secs(1),
},
)
.await
.unwrap();
assert_eq!(plan.display_indent_schema().to_string(),
"PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n Sort: metrics.tag DESC NULLS LAST, metrics.timestamp DESC NULLS LAST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-1000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(Millisecond, None)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
);
let plan = PromPlanner::stmt_to_plan(
DfTableSourceProvider::new(catalog_list.clone(), false, QueryContext::arc().as_ref()),
EvalStmt {
expr: parser::parse("avg_over_time(metrics{tag = \"1\"}[5s])").unwrap(),
start: UNIX_EPOCH,
end: UNIX_EPOCH
.checked_add(Duration::from_secs(100_000))
.unwrap(),
interval: Duration::from_secs(5),
lookback_delta: Duration::from_secs(1),
},
)
.await
.unwrap();
assert_eq!(plan.display_indent_schema().to_string(),
"Filter: prom_avg_over_time(timestamp_range,field) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
\n Projection: metrics.timestamp, prom_avg_over_time(timestamp_range, field) AS prom_avg_over_time(timestamp_range,field), metrics.tag [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
\n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[5000], time index=[timestamp], values=[\"field\"] [field:Dictionary(Int64, Float64);N, tag:Utf8, timestamp:Timestamp(Millisecond, None), timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n Sort: metrics.tag DESC NULLS LAST, metrics.timestamp DESC NULLS LAST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-6000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(Millisecond, None)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
);
}
}

View File

@@ -1116,9 +1116,11 @@ impl RangeSelectStream {
let ts_column_ref = ts_column
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.ok_or(DataFusionError::Execution(
"Time index Column downcast to TimestampMillisecondArray failed".into(),
))?;
.ok_or_else(|| {
DataFusionError::Execution(
"Time index Column downcast to TimestampMillisecondArray failed".into(),
)
})?;
for i in 0..self.range_exec.len() {
let args = self.evaluate_many(&batch, &self.range_exec[i].args)?;
// use self.modify_map record (hash, align_ts) => [row_nums]

View File

@@ -19,6 +19,7 @@ use std::time::Duration;
use arrow_schema::DataType;
use async_recursion::async_recursion;
use catalog::table_source::DfTableSourceProvider;
use chrono::Utc;
use common_time::interval::NANOS_PER_MILLI;
use common_time::timestamp::TimeUnit;
use common_time::{Interval, Timestamp, Timezone};
@@ -27,10 +28,13 @@ use datafusion::prelude::Column;
use datafusion::scalar::ScalarValue;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
use datafusion_common::{DFSchema, DataFusionError, Result as DFResult};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::{
Aggregate, Analyze, Explain, Expr, ExprSchemable, Extension, LogicalPlan, LogicalPlanBuilder,
Projection,
};
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datatypes::prelude::ConcreteDataType;
use promql_parser::util::parse_duration;
use session::context::QueryContextRef;
@@ -108,34 +112,84 @@ fn parse_expr_to_string(args: &[Expr], i: usize) -> DFResult<String> {
/// Parse a duraion expr:
/// 1. duration string (e.g. `'1h'`)
/// 2. Interval expr (e.g. `INTERVAL '1 year 3 hours 20 minutes'`)
/// 3. An interval expr can be evaluated at the logical plan stage (e.g. `INTERVAL '2' day - INTERVAL '1' day`)
fn parse_duration_expr(args: &[Expr], i: usize) -> DFResult<Duration> {
let interval_to_duration = |interval: Interval| -> Duration {
Duration::from_millis((interval.to_nanosecond() / NANOS_PER_MILLI as i128) as u64)
};
match args.get(i) {
Some(Expr::Literal(ScalarValue::Utf8(Some(str)))) => {
parse_duration(str).map_err(DataFusionError::Plan)
}
Some(Expr::Literal(ScalarValue::IntervalYearMonth(Some(i)))) => {
Ok(interval_to_duration(Interval::from_i32(*i)))
Some(expr) => {
let ms = evaluate_expr_to_millisecond(args, i, true)?;
if ms <= 0 {
return Err(dispose_parse_error(Some(expr)));
}
Ok(Duration::from_millis(ms as u64))
}
Some(Expr::Literal(ScalarValue::IntervalDayTime(Some(i)))) => {
Ok(interval_to_duration(Interval::from_i64(*i)))
}
Some(Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(i)))) => {
Ok(interval_to_duration(Interval::from_i128(*i)))
}
other => Err(dispose_parse_error(other)),
None => Err(dispose_parse_error(None)),
}
}
/// Evaluate a time calculation expr, case like:
/// 1. `INTERVAL '1' day + INTERVAL '1 year 2 hours 3 minutes'`
/// 2. `now() - INTERVAL '1' day` (when `interval_only==false`)
///
/// Output a millisecond timestamp
///
/// if `interval_only==true`, only accept expr with all interval type (case 2 will return a error)
fn evaluate_expr_to_millisecond(args: &[Expr], i: usize, interval_only: bool) -> DFResult<i64> {
let Some(expr) = args.get(i) else {
return Err(dispose_parse_error(None));
};
if interval_only && !interval_only_in_expr(expr) {
return Err(dispose_parse_error(Some(expr)));
}
let execution_props = ExecutionProps::new().with_query_execution_start_time(Utc::now());
let info = SimplifyContext::new(&execution_props).with_schema(Arc::new(DFSchema::empty()));
let interval_to_ms =
|interval: Interval| -> i64 { (interval.to_nanosecond() / NANOS_PER_MILLI as i128) as i64 };
let simplify_expr = ExprSimplifier::new(info).simplify(expr.clone())?;
match simplify_expr {
Expr::Literal(ScalarValue::TimestampNanosecond(ts_nanos, _))
| Expr::Literal(ScalarValue::DurationNanosecond(ts_nanos)) => {
ts_nanos.map(|v| v / 1_000_000)
}
Expr::Literal(ScalarValue::TimestampMicrosecond(ts_micros, _))
| Expr::Literal(ScalarValue::DurationMicrosecond(ts_micros)) => {
ts_micros.map(|v| v / 1_000)
}
Expr::Literal(ScalarValue::TimestampMillisecond(ts_millis, _))
| Expr::Literal(ScalarValue::DurationMillisecond(ts_millis)) => ts_millis,
Expr::Literal(ScalarValue::TimestampSecond(ts_secs, _))
| Expr::Literal(ScalarValue::DurationSecond(ts_secs)) => ts_secs.map(|v| v * 1_000),
Expr::Literal(ScalarValue::IntervalYearMonth(interval)) => {
interval.map(|v| interval_to_ms(Interval::from_i32(v)))
}
Expr::Literal(ScalarValue::IntervalDayTime(interval)) => {
interval.map(|v| interval_to_ms(Interval::from_i64(v)))
}
Expr::Literal(ScalarValue::IntervalMonthDayNano(interval)) => {
interval.map(|v| interval_to_ms(Interval::from_i128(v)))
}
_ => None,
}
.ok_or_else(|| {
DataFusionError::Plan(format!(
"{} is not a expr can be evaluate and use in range query",
expr.display_name().unwrap_or_default()
))
})
}
/// Parse the `align to` clause and return a UTC timestamp with unit of millisecond,
/// which is used as the basis for dividing time slot during the align operation.
/// 1. NOW: align to current execute time
/// 2. Timestamp string: align to specific timestamp
/// 3. leave empty (as Default Option): align to unix epoch 0 (timezone aware)
/// 3. An expr can be evaluated at the logical plan stage (e.g. `now() - INTERVAL '1' day`)
/// 4. leave empty (as Default Option): align to unix epoch 0 (timezone aware)
fn parse_align_to(args: &[Expr], i: usize, timezone: Option<&Timezone>) -> DFResult<i64> {
let s = parse_str_expr(args, i)?;
let Ok(s) = parse_str_expr(args, i) else {
return evaluate_expr_to_millisecond(args, i, false);
};
let upper = s.to_uppercase();
match upper.as_str() {
"NOW" => return Ok(Timestamp::current_millis().value()),
@@ -469,6 +523,25 @@ fn have_range_in_exprs(exprs: &[Expr]) -> bool {
})
}
fn interval_only_in_expr(expr: &Expr) -> bool {
let mut all_interval = true;
let _ = expr.apply(&mut |expr| {
if !matches!(
expr,
Expr::Literal(ScalarValue::IntervalDayTime(_))
| Expr::Literal(ScalarValue::IntervalMonthDayNano(_))
| Expr::Literal(ScalarValue::IntervalYearMonth(_))
| Expr::BinaryExpr(_)
) {
all_interval = false;
Ok(TreeNodeRecursion::Stop)
} else {
Ok(TreeNodeRecursion::Continue)
}
});
all_interval
}
#[cfg(test)]
mod test {
@@ -477,6 +550,7 @@ mod test {
use catalog::memory::MemoryCatalogManager;
use catalog::RegisterTableRequest;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datafusion_expr::{BinaryExpr, Operator};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use session::context::QueryContext;
@@ -754,8 +828,42 @@ mod test {
parse_duration_expr(&args, 0).unwrap(),
parse_duration("1y4w").unwrap()
);
// test err
// test index err
assert!(parse_duration_expr(&args, 10).is_err());
// test evaluate expr
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
op: Operator::Plus,
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
})];
assert_eq!(
parse_duration_expr(&args, 0).unwrap().as_millis(),
interval_to_ms(Interval::from_year_month(20))
);
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
})];
// test zero interval error
assert!(parse_duration_expr(&args, 0).is_err());
// test must all be interval
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::Time64Microsecond(Some(0)))),
})];
assert!(parse_duration_expr(&args, 0).is_err());
}
#[test]
@@ -787,19 +895,56 @@ mod test {
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
"1970-01-01T00:00:00+08:00".into(),
)))];
assert!(parse_align_to(&args, 0, None).unwrap() == -8 * 60 * 60 * 1000);
assert_eq!(parse_align_to(&args, 0, None).unwrap(), -8 * 60 * 60 * 1000);
// timezone
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
"1970-01-01T00:00:00".into(),
)))];
assert!(
assert_eq!(
parse_align_to(
&args,
0,
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
)
.unwrap()
== -8 * 60 * 60 * 1000
.unwrap(),
-8 * 60 * 60 * 1000
);
// test evaluate expr
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
op: Operator::Plus,
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
})];
assert_eq!(
parse_align_to(&args, 0, None).unwrap(),
// 20 month
20 * 30 * 24 * 60 * 60 * 1000
);
}
#[test]
fn test_interval_only() {
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::DurationMillisecond(Some(20)))),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
});
assert!(!interval_only_in_expr(&expr));
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
Interval::from_year_month(10).to_i32(),
)))),
});
assert!(interval_only_in_expr(&expr));
}
}

View File

@@ -24,6 +24,7 @@ use sql::dialect::GreptimeDbDialect;
use sql::parser::ParserContext;
use sql::statements::create::{CreateTable, TIME_INDEX};
use sql::statements::{self, OptionMap};
use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column};
use table::metadata::{TableInfoRef, TableMeta};
use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY};
@@ -96,6 +97,7 @@ fn create_column_def(column_schema: &ColumnSchema, quote_style: char) -> Result<
}
fn create_table_constraints(
engine: &str,
schema: &SchemaRef,
table_meta: &TableMeta,
quote_style: char,
@@ -111,9 +113,16 @@ fn create_table_constraints(
});
}
if !table_meta.primary_key_indices.is_empty() {
let is_metric_engine = is_metric_engine(engine);
let columns = table_meta
.row_key_column_names()
.map(|name| Ident::with_quote(quote_style, name))
.flat_map(|name| {
if is_metric_engine && is_metric_engine_internal_column(name) {
None
} else {
Some(Ident::with_quote(quote_style, name))
}
})
.collect();
constraints.push(TableConstraint::Unique {
name: None,
@@ -131,14 +140,20 @@ pub fn create_table_stmt(table_info: &TableInfoRef, quote_style: char) -> Result
let table_meta = &table_info.meta;
let table_name = &table_info.name;
let schema = &table_info.meta.schema;
let is_metric_engine = is_metric_engine(&table_meta.engine);
let columns = schema
.column_schemas()
.iter()
.map(|c| create_column_def(c, quote_style))
.filter_map(|c| {
if is_metric_engine && is_metric_engine_internal_column(&c.name) {
None
} else {
Some(create_column_def(c, quote_style))
}
})
.collect::<Result<Vec<_>>>()?;
let constraints = create_table_constraints(schema, table_meta, quote_style);
let constraints = create_table_constraints(&table_meta.engine, schema, table_meta, quote_style);
Ok(CreateTable {
if_not_exists: true,

View File

@@ -308,6 +308,71 @@ mod tests {
}
}
#[test]
fn test_parse_alter_change_column_alias_type() {
let sql_1 = "ALTER TABLE my_metric_1 MODIFY COLUMN a MediumText";
let mut result_1 = ParserContext::create_with_dialect(
sql_1,
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap();
match result_1.remove(0) {
Statement::Alter(alter_table) => {
assert_eq!("my_metric_1", alter_table.table_name().0[0].value);
let alter_operation = alter_table.alter_operation();
assert_matches!(
alter_operation,
AlterTableOperation::ChangeColumnType { .. }
);
match alter_operation {
AlterTableOperation::ChangeColumnType {
column_name,
target_type,
} => {
assert_eq!("a", column_name.value);
assert_eq!(DataType::Text, *target_type);
}
_ => unreachable!(),
}
}
_ => unreachable!(),
}
let sql_2 = "ALTER TABLE my_metric_1 MODIFY COLUMN a TIMESTAMP_US";
let mut result_2 = ParserContext::create_with_dialect(
sql_2,
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap();
match result_2.remove(0) {
Statement::Alter(alter_table) => {
assert_eq!("my_metric_1", alter_table.table_name().0[0].value);
let alter_operation = alter_table.alter_operation();
assert_matches!(
alter_operation,
AlterTableOperation::ChangeColumnType { .. }
);
match alter_operation {
AlterTableOperation::ChangeColumnType {
column_name,
target_type,
} => {
assert_eq!("a", column_name.value);
assert!(matches!(target_type, DataType::Timestamp(Some(6), _)));
}
_ => unreachable!(),
}
}
_ => unreachable!(),
}
}
#[test]
fn test_parse_alter_rename_table() {
let sql = "ALTER TABLE test_table table_t";

View File

@@ -56,7 +56,14 @@ impl<'a> ParserContext<'a> {
})?;
let req = if self.parser.parse_keyword(Keyword::TO) {
let (with, connection, location) = self.parse_copy_parameters()?;
let (with, connection, location, limit) = self.parse_copy_parameters()?;
if limit.is_some() {
return error::InvalidSqlSnafu {
msg: "limit is not supported",
}
.fail();
}
let argument = CopyDatabaseArgument {
database_name,
with: with.into(),
@@ -68,7 +75,14 @@ impl<'a> ParserContext<'a> {
self.parser
.expect_keyword(Keyword::FROM)
.context(error::SyntaxSnafu)?;
let (with, connection, location) = self.parse_copy_parameters()?;
let (with, connection, location, limit) = self.parse_copy_parameters()?;
if limit.is_some() {
return error::InvalidSqlSnafu {
msg: "limit is not supported",
}
.fail();
}
let argument = CopyDatabaseArgument {
database_name,
with: with.into(),
@@ -91,28 +105,30 @@ impl<'a> ParserContext<'a> {
let table_name = Self::canonicalize_object_name(raw_table_name);
if self.parser.parse_keyword(Keyword::TO) {
let (with, connection, location) = self.parse_copy_parameters()?;
let (with, connection, location, limit) = self.parse_copy_parameters()?;
Ok(CopyTable::To(CopyTableArgument {
table_name,
with: with.into(),
connection: connection.into(),
location,
limit,
}))
} else {
self.parser
.expect_keyword(Keyword::FROM)
.context(error::SyntaxSnafu)?;
let (with, connection, location) = self.parse_copy_parameters()?;
let (with, connection, location, limit) = self.parse_copy_parameters()?;
Ok(CopyTable::From(CopyTableArgument {
table_name,
with: with.into(),
connection: connection.into(),
location,
limit,
}))
}
}
fn parse_copy_parameters(&mut self) -> Result<(With, Connection, String)> {
fn parse_copy_parameters(&mut self) -> Result<(With, Connection, String, Option<u64>)> {
let location =
self.parser
.parse_literal_string()
@@ -142,7 +158,21 @@ impl<'a> ParserContext<'a> {
.map(parse_option_string)
.collect::<Result<Connection>>()?;
Ok((with, connection, location))
let limit = if self.parser.parse_keyword(Keyword::LIMIT) {
Some(
self.parser
.parse_literal_uint()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "the number of maximum rows",
actual: self.peek_token_as_string(),
})?,
)
} else {
None
};
Ok((with, connection, location, limit))
}
}

View File

@@ -104,17 +104,19 @@ impl<'a> ParserContext<'a> {
let (start, end, step, lookback) = match parser.peek_token().token {
Token::LParen => {
let _consume_lparen_token = parser.next_token();
let start = Self::parse_string_or_number_or_word(parser, Token::Comma)?;
let end = Self::parse_string_or_number_or_word(parser, Token::Comma)?;
let delimiter_token = Self::find_next_delimiter_token(parser);
let (step, lookback) = if Self::is_comma(&delimiter_token) {
let step = Self::parse_string_or_number_or_word(parser, Token::Comma)?;
let lookback = Self::parse_string_or_number_or_word(parser, Token::RParen).ok();
(step, lookback)
let start = Self::parse_string_or_number_or_word(parser, &[Token::Comma])?.0;
let end = Self::parse_string_or_number_or_word(parser, &[Token::Comma])?.0;
let (step, delimiter) =
Self::parse_string_or_number_or_word(parser, &[Token::Comma, Token::RParen])?;
let lookback = if delimiter == Token::Comma {
Self::parse_string_or_number_or_word(parser, &[Token::RParen])
.ok()
.map(|t| t.0)
} else {
let step = Self::parse_string_or_number_or_word(parser, Token::RParen)?;
(step, None)
None
};
(start, end, step, lookback)
}
_ => ("0".to_string(), "0".to_string(), "5m".to_string(), None),
@@ -123,22 +125,8 @@ impl<'a> ParserContext<'a> {
Ok(TqlParameters::new(start, end, step, lookback, query))
}
fn find_next_delimiter_token(parser: &mut Parser) -> Token {
let mut n: usize = 0;
while !(Self::is_comma(&parser.peek_nth_token(n).token)
|| Self::is_rparen(&parser.peek_nth_token(n).token))
{
n += 1;
}
parser.peek_nth_token(n).token
}
pub fn is_delimiter_token(token: &Token, delimiter_token: &Token) -> bool {
match token {
Token::Comma => Self::is_comma(delimiter_token),
Token::RParen => Self::is_rparen(delimiter_token),
_ => false,
}
pub fn comma_or_rparen(token: &Token) -> bool {
Self::is_comma(token) || Self::is_rparen(token)
}
#[inline]
@@ -155,15 +143,21 @@ impl<'a> ParserContext<'a> {
self.peek_token_as_string().eq_ignore_ascii_case(VERBOSE)
}
/// Try to parse and consume a string, number or word token.
/// Return `Ok` if it's parsed and one of the given delimiter tokens is consumed.
/// The string and matched delimiter will be returned as a tuple.
fn parse_string_or_number_or_word(
parser: &mut Parser,
delimiter_token: Token,
) -> std::result::Result<String, TQLError> {
delimiter_tokens: &[Token],
) -> std::result::Result<(String, Token), TQLError> {
let mut tokens = vec![];
while !Self::is_delimiter_token(&parser.peek_token().token, &delimiter_token) {
let token = parser.next_token();
tokens.push(token.token);
while !delimiter_tokens.contains(&parser.peek_token().token) {
let token = parser.next_token().token;
if matches!(token, Token::EOF) {
break;
}
tokens.push(token);
}
let result = match tokens.len() {
0 => Err(ParserError::ParserError(
@@ -186,8 +180,15 @@ impl<'a> ParserContext<'a> {
}
_ => Self::parse_tokens(tokens),
};
parser.expect_token(&delimiter_token).context(ParserSnafu)?;
result
for token in delimiter_tokens {
if parser.consume_token(token) {
return result.map(|v| (v, token.clone()));
}
}
Err(ParserError::ParserError(format!(
"Delimiters not match {delimiter_tokens:?}"
)))
.context(ParserSnafu)
}
fn parse_tokens(tokens: Vec<Token>) -> std::result::Result<String, TQLError> {
@@ -733,5 +734,11 @@ mod tests {
let result =
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
assert!(result.output_msg().contains("empty TQL query"));
// invalid token
let sql = "tql eval (0, 0, '1s) t;;';";
let result =
ParserContext::create_with_dialect(sql, dialect, parse_options.clone()).unwrap_err();
assert!(result.output_msg().contains("Delimiters not match"));
}
}

View File

@@ -39,6 +39,10 @@ impl AlterTable {
pub fn alter_operation(&self) -> &AlterTableOperation {
&self.alter_operation
}
pub fn alter_operation_mut(&mut self) -> &mut AlterTableOperation {
&mut self.alter_operation
}
}
impl Display for AlterTable {

View File

@@ -111,6 +111,7 @@ pub struct CopyTableArgument {
pub connection: OptionMap,
/// Copy tbl [To|From] 'location'.
pub location: String,
pub limit: Option<u64>,
}
#[cfg(test)]

View File

@@ -20,6 +20,7 @@ use sqlparser::ast::{
};
use crate::error::Result;
use crate::statements::alter::AlterTableOperation;
use crate::statements::create::{CreateExternalTable, CreateTable};
use crate::statements::statement::Statement;
use crate::statements::transform::TransformRule;
@@ -51,6 +52,13 @@ impl TransformRule for TypeAliasTransformRule {
.iter_mut()
.for_each(|ColumnDef { data_type, .. }| replace_type_alias(data_type));
}
Statement::Alter(alter_table) => {
if let AlterTableOperation::ChangeColumnType { target_type, .. } =
alter_table.alter_operation_mut()
{
replace_type_alias(target_type)
}
}
_ => {}
}

View File

@@ -39,6 +39,8 @@ pub const DATA_REGION_SUBDIR: &str = "data";
pub const METRIC_ENGINE_NAME: &str = "metric";
pub const FILE_ENGINE_NAME: &str = "file";
/// Metadata key present in the `CREATE TABLE ... WITH ()` clause. This key is
/// used to identify the table is a physical metric table. E.g.:
/// ```sql
@@ -70,3 +72,13 @@ pub const LOGICAL_TABLE_METADATA_KEY: &str = "on_physical_table";
/// HashMap key to be used in the region server's extension response.
/// Represent a list of column metadata that are added to physical table.
pub const ALTER_PHYSICAL_EXTENSION_KEY: &str = "ALTER_PHYSICAL";
/// Returns true if it's a internal column of the metric engine.
pub fn is_metric_engine_internal_column(name: &str) -> bool {
name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME || name == DATA_SCHEMA_TSID_COLUMN_NAME
}
/// Returns true if it's metric engine
pub fn is_metric_engine(name: &str) -> bool {
name == METRIC_ENGINE_NAME
}

View File

@@ -228,6 +228,7 @@ pub struct CopyTableRequest {
pub pattern: Option<String>,
pub direction: CopyDirection,
pub timestamp_range: Option<TimestampRange>,
pub limit: Option<u64>,
}
#[derive(Debug, Clone, Default)]

View File

@@ -39,6 +39,7 @@ common-wal.workspace = true
datanode = { workspace = true }
datatypes.workspace = true
dotenv.workspace = true
flow.workspace = true
frontend = { workspace = true, features = ["testing"] }
futures.workspace = true
futures-util.workspace = true

View File

@@ -35,6 +35,7 @@ use common_procedure::options::ProcedureConfig;
use common_procedure::ProcedureManagerRef;
use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
use datanode::datanode::DatanodeBuilder;
use flow::FlownodeBuilder;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
@@ -128,6 +129,7 @@ impl GreptimeDbStandaloneBuilder {
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
table_metadata_manager.init().await.unwrap();
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
let layered_cache_builder = LayeredCacheRegistryBuilder::default();
@@ -149,7 +151,19 @@ impl GreptimeDbStandaloneBuilder {
)
.await;
let node_manager = Arc::new(StandaloneDatanodeManager(datanode.region_server()));
let flow_builder = FlownodeBuilder::new(
1, // for standalone mode this value is default to one
Default::default(),
plugins.clone(),
table_metadata_manager.clone(),
catalog_manager.clone(),
);
let flownode = Arc::new(flow_builder.build().await);
let node_manager = Arc::new(StandaloneDatanodeManager {
region_server: datanode.region_server(),
flow_server: flownode.clone(),
});
let table_id_sequence = Arc::new(
SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
@@ -204,6 +218,11 @@ impl GreptimeDbStandaloneBuilder {
.await
.unwrap();
flownode
.set_frontend_invoker(Box::new(instance.clone()))
.await;
let _node_handle = flownode.run_background();
procedure_manager.start().await.unwrap();
wal_options_allocator.start().await.unwrap();

View File

@@ -36,8 +36,12 @@ macro_rules! sql_test {
#[$meta]
)*
async fn [< $test >]() {
common_telemetry::init_default_ut_logging();
let store_type = tests_integration::test_util::StorageType::$service;
if store_type.test_on() {
common_telemetry::info!("test {} starts, store_type: {:?}", stringify!($test), store_type);
let _ = $crate::sql::$test(store_type).await;
}
@@ -427,8 +431,10 @@ pub async fn test_postgres_bytea(store_type: StorageType) {
let (client, connection) = tokio_postgres::connect(&format!("postgres://{addr}/public"), NoTls)
.await
.unwrap();
let (tx, rx) = tokio::sync::oneshot::channel();
tokio::spawn(async move {
connection.await.unwrap();
tx.send(()).unwrap();
});
let _ = client
.simple_query("CREATE TABLE test(b BLOB, ts TIMESTAMP TIME INDEX)")
@@ -481,6 +487,9 @@ pub async fn test_postgres_bytea(store_type: StorageType) {
let val: Vec<u8> = row.get("b");
assert_eq!(val, [97, 98, 99, 107, 108, 109, 42, 169, 84]);
drop(client);
rx.await.unwrap();
let _ = fe_pg_server.shutdown().await;
guard.remove_all().await;
}
@@ -492,8 +501,10 @@ pub async fn test_postgres_datestyle(store_type: StorageType) {
.await
.unwrap();
let (tx, rx) = tokio::sync::oneshot::channel();
tokio::spawn(async move {
connection.await.unwrap();
tx.send(()).unwrap();
});
let validate_datestyle = |client: Client, datestyle: &str, is_valid: bool| {
@@ -703,6 +714,9 @@ pub async fn test_postgres_datestyle(store_type: StorageType) {
}
}
drop(client);
rx.await.unwrap();
let _ = fe_pg_server.shutdown().await;
guard.remove_all().await;
}
@@ -714,8 +728,10 @@ pub async fn test_postgres_timezone(store_type: StorageType) {
.await
.unwrap();
let (tx, rx) = tokio::sync::oneshot::channel();
tokio::spawn(async move {
connection.await.unwrap();
tx.send(()).unwrap();
});
let get_row = |mess: Vec<SimpleQueryMessage>| -> String {
@@ -758,6 +774,10 @@ pub async fn test_postgres_timezone(store_type: StorageType) {
.unwrap(),
);
assert_eq!(timezone, "UTC");
drop(client);
rx.await.unwrap();
let _ = fe_pg_server.shutdown().await;
guard.remove_all().await;
}
@@ -769,8 +789,10 @@ pub async fn test_postgres_parameter_inference(store_type: StorageType) {
.await
.unwrap();
let (tx, rx) = tokio::sync::oneshot::channel();
tokio::spawn(async move {
connection.await.unwrap();
tx.send(()).unwrap();
});
// Create demo table
@@ -796,6 +818,10 @@ pub async fn test_postgres_parameter_inference(store_type: StorageType) {
assert_eq!(1, rows.len());
// Shutdown the client.
drop(client);
rx.await.unwrap();
let _ = fe_pg_server.shutdown().await;
guard.remove_all().await;
}

View File

@@ -52,6 +52,14 @@ SELECT * FROM demo ORDER BY ts;
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
+-------+------+--------+---------------------+
DELETE FROM demo;
Affected Rows: 1
COPY DATABASE public FROM '/tmp/demo/export/parquet_range/' LIMIT 2;
Error: 2000(InvalidSyntax), Invalid SQL, error: limit is not supported
DROP TABLE demo;
Affected Rows: 0

View File

@@ -20,4 +20,8 @@ COPY DATABASE public FROM '/tmp/demo/export/parquet_range/';
SELECT * FROM demo ORDER BY ts;
DELETE FROM demo;
COPY DATABASE public FROM '/tmp/demo/export/parquet_range/' LIMIT 2;
DROP TABLE demo;

View File

@@ -93,15 +93,15 @@ select count(*) from without_limit_rows;
| 4 |
+----------+
CREATE TABLE with_limit_rows(host string, cpu double, memory double, ts timestamp time index);
CREATE TABLE with_limit_rows_segment(host string, cpu double, memory double, ts timestamp time index);
Affected Rows: 0
Copy with_limit_rows FROM '/tmp/demo/export/parquet_files/' WITH (MAX_INSERT_ROWS = 2);
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT 2;
Affected Rows: 2
select count(*) from with_limit_rows;
select count(*) from with_limit_rows_segment;
+----------+
| COUNT(*) |
@@ -109,6 +109,10 @@ select count(*) from with_limit_rows;
| 2 |
+----------+
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT hello;
Error: 2000(InvalidSyntax), Unexpected token while parsing SQL statement: Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT hello;, expected: 'the number of maximum rows', found: ;: sql parser error: Expected literal int, found: hello at Line: 1, Column 75
drop table demo;
Affected Rows: 0
@@ -133,7 +137,7 @@ drop table without_limit_rows;
Affected Rows: 0
drop table with_limit_rows;
drop table with_limit_rows_segment;
Affected Rows: 0

View File

@@ -34,11 +34,13 @@ Copy without_limit_rows FROM '/tmp/demo/export/parquet_files/';
select count(*) from without_limit_rows;
CREATE TABLE with_limit_rows(host string, cpu double, memory double, ts timestamp time index);
CREATE TABLE with_limit_rows_segment(host string, cpu double, memory double, ts timestamp time index);
Copy with_limit_rows FROM '/tmp/demo/export/parquet_files/' WITH (MAX_INSERT_ROWS = 2);
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT 2;
select count(*) from with_limit_rows;
select count(*) from with_limit_rows_segment;
Copy with_limit_rows_segment FROM '/tmp/demo/export/parquet_files/' LIMIT hello;
drop table demo;
@@ -52,4 +54,4 @@ drop table with_pattern;
drop table without_limit_rows;
drop table with_limit_rows;
drop table with_limit_rows_segment;

View File

@@ -0,0 +1,121 @@
CREATE TABLE host_sec (
ts timestamp(0) time index,
host STRING PRIMARY KEY,
val DOUBLE,
);
Affected Rows: 0
INSERT INTO TABLE host_sec VALUES
(0, 'host1', 1),
(0, 'host2', 2),
(5, 'host1', 3),
(5, 'host2', 4),
(10, 'host1', 5),
(10, 'host2', 6),
(15, 'host1', 7),
(15, 'host2', 8);
Affected Rows: 8
CREATE TABLE host_micro (
ts timestamp(6) time index,
host STRING PRIMARY KEY,
val DOUBLE,
);
Affected Rows: 0
INSERT INTO TABLE host_micro VALUES
(0, 'host1', 1),
(0, 'host2', 2),
(5000000, 'host1', 3),
(5000000, 'host2', 4),
(10000000, 'host1', 5),
(10000000, 'host2', 6),
(15000000, 'host1', 7),
(15000000, 'host2', 8);
Affected Rows: 8
-- Test on Timestamps of different precisions
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') host_sec{host="host1"};
+-----+-------+---------------------+
| val | host | ts |
+-----+-------+---------------------+
| 1.0 | host1 | 1970-01-01T00:00:00 |
| 3.0 | host1 | 1970-01-01T00:00:05 |
| 5.0 | host1 | 1970-01-01T00:00:10 |
| 7.0 | host1 | 1970-01-01T00:00:15 |
+-----+-------+---------------------+
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]);
+---------------------+----------------------------------+-------+
| ts | prom_avg_over_time(ts_range,val) | host |
+---------------------+----------------------------------+-------+
| 1970-01-01T00:00:00 | 1.0 | host1 |
| 1970-01-01T00:00:05 | 2.0 | host1 |
| 1970-01-01T00:00:10 | 4.0 | host1 |
| 1970-01-01T00:00:15 | 6.0 | host1 |
+---------------------+----------------------------------+-------+
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') host_micro{host="host1"};
+-----+-------+---------------------+
| val | host | ts |
+-----+-------+---------------------+
| 1.0 | host1 | 1970-01-01T00:00:00 |
| 3.0 | host1 | 1970-01-01T00:00:05 |
| 5.0 | host1 | 1970-01-01T00:00:10 |
| 7.0 | host1 | 1970-01-01T00:00:15 |
+-----+-------+---------------------+
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') avg_over_time(host_micro{host="host1"}[5s]);
+---------------------+----------------------------------+-------+
| ts | prom_avg_over_time(ts_range,val) | host |
+---------------------+----------------------------------+-------+
| 1970-01-01T00:00:00 | 1.0 | host1 |
| 1970-01-01T00:00:05 | 2.0 | host1 |
| 1970-01-01T00:00:10 | 4.0 | host1 |
| 1970-01-01T00:00:15 | 6.0 | host1 |
+---------------------+----------------------------------+-------+
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') host_sec{host="host1"} + host_micro{host="host1"};
+-------+---------------------+-------------------------------+
| host | ts | host_sec.val + host_micro.val |
+-------+---------------------+-------------------------------+
| host1 | 1970-01-01T00:00:00 | 2.0 |
| host1 | 1970-01-01T00:00:05 | 6.0 |
| host1 | 1970-01-01T00:00:10 | 10.0 |
| host1 | 1970-01-01T00:00:15 | 14.0 |
+-------+---------------------+-------------------------------+
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]) + avg_over_time(host_micro{host="host1"}[5s]);
+-------+---------------------+-----------------------------------------------------------------------------------------+
| host | ts | host_sec.prom_avg_over_time(ts_range,val) + host_micro.prom_avg_over_time(ts_range,val) |
+-------+---------------------+-----------------------------------------------------------------------------------------+
| host1 | 1970-01-01T00:00:00 | 2.0 |
| host1 | 1970-01-01T00:00:05 | 4.0 |
| host1 | 1970-01-01T00:00:10 | 8.0 |
| host1 | 1970-01-01T00:00:15 | 12.0 |
+-------+---------------------+-----------------------------------------------------------------------------------------+
DROP TABLE host_sec;
Affected Rows: 0
DROP TABLE host_micro;
Affected Rows: 0

View File

@@ -0,0 +1,55 @@
CREATE TABLE host_sec (
ts timestamp(0) time index,
host STRING PRIMARY KEY,
val DOUBLE,
);
INSERT INTO TABLE host_sec VALUES
(0, 'host1', 1),
(0, 'host2', 2),
(5, 'host1', 3),
(5, 'host2', 4),
(10, 'host1', 5),
(10, 'host2', 6),
(15, 'host1', 7),
(15, 'host2', 8);
CREATE TABLE host_micro (
ts timestamp(6) time index,
host STRING PRIMARY KEY,
val DOUBLE,
);
INSERT INTO TABLE host_micro VALUES
(0, 'host1', 1),
(0, 'host2', 2),
(5000000, 'host1', 3),
(5000000, 'host2', 4),
(10000000, 'host1', 5),
(10000000, 'host2', 6),
(15000000, 'host1', 7),
(15000000, 'host2', 8);
-- Test on Timestamps of different precisions
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') host_sec{host="host1"};
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]);
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') host_micro{host="host1"};
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') avg_over_time(host_micro{host="host1"}[5s]);
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') host_sec{host="host1"} + host_micro{host="host1"};
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') avg_over_time(host_sec{host="host1"}[5s]) + avg_over_time(host_micro{host="host1"}[5s]);
DROP TABLE host_sec;
DROP TABLE host_micro;

View File

@@ -98,11 +98,11 @@ Error: 3000(PlanQuery), DataFusion error: Error during planning: duration must b
SELECT min(val) RANGE '5s' FROM host ALIGN (INTERVAL '0' day);
Error: 2000(InvalidSyntax), Range Query: Can't use 0 as align in Range Query
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("0")` in range select query
SELECT min(val) RANGE (INTERVAL '0' day) FROM host ALIGN '5s';
Error: 2000(InvalidSyntax), Range Query: Invalid Range expr `MIN(host.val) RANGE IntervalMonthDayNano("0")`, Can't use 0 as range in Range Query
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("0")` in range select query
DROP TABLE host;

View File

@@ -82,6 +82,30 @@ SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day)
| 2024-01-24T23:00:00 | 3 |
+---------------------+------------------------------------------------------------------+
SELECT ts, min(val) RANGE (INTERVAL '2' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '2' day - INTERVAL '1' day) TO (now() - (now() + INTERVAL '1' hour)) by (1) ORDER BY ts;
+---------------------+-----------------------------------------------------------------------------------------------------------------+
| ts | MIN(host.val) RANGE IntervalMonthDayNano("36893488147419103232") - IntervalMonthDayNano("18446744073709551616") |
+---------------------+-----------------------------------------------------------------------------------------------------------------+
| 2024-01-22T23:00:00 | 0 |
| 2024-01-23T23:00:00 | 1 |
| 2024-01-24T23:00:00 | 3 |
+---------------------+-----------------------------------------------------------------------------------------------------------------+
-- non-positive duration
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '2' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("18446744073709551616") - IntervalMonthDayNano("36893488147419103232")` in range select query
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("18446744073709551616") - IntervalMonthDayNano("18446744073709551616")` in range select query
-- duration not all interval
SELECT ts, min(val) RANGE (now() - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `now() - IntervalMonthDayNano("18446744073709551616")` in range select query
--- ALIGN TO with time zone ---
set time_zone='Asia/Shanghai';

View File

@@ -26,6 +26,18 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:0
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
SELECT ts, min(val) RANGE (INTERVAL '2' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '2' day - INTERVAL '1' day) TO (now() - (now() + INTERVAL '1' hour)) by (1) ORDER BY ts;
-- non-positive duration
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '2' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
-- duration not all interval
SELECT ts, min(val) RANGE (now() - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
--- ALIGN TO with time zone ---
set time_zone='Asia/Shanghai';

View File

@@ -95,3 +95,100 @@ WITH(
Error: 1004(InvalidArguments), Object store not found: S3
CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "");
Affected Rows: 0
CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy");
Affected Rows: 0
show create table phy;
+-------+------------------------------------+
| Table | Create Table |
+-------+------------------------------------+
| phy | CREATE TABLE IF NOT EXISTS "phy" ( |
| | "ts" TIMESTAMP(3) NOT NULL, |
| | "val" DOUBLE NULL, |
| | "host" STRING NULL, |
| | TIME INDEX ("ts"), |
| | PRIMARY KEY ("host") |
| | ) |
| | |
| | ENGINE=metric |
| | WITH( |
| | physical_metric_table = '' |
| | ) |
+-------+------------------------------------+
show create table t1;
+-------+-----------------------------------+
| Table | Create Table |
+-------+-----------------------------------+
| t1 | CREATE TABLE IF NOT EXISTS "t1" ( |
| | "host" STRING NULL, |
| | "ts" TIMESTAMP(3) NOT NULL, |
| | "val" DOUBLE NULL, |
| | TIME INDEX ("ts"), |
| | PRIMARY KEY ("host") |
| | ) |
| | |
| | ENGINE=metric |
| | WITH( |
| | on_physical_table = 'phy' |
| | ) |
+-------+-----------------------------------+
drop table t1;
Affected Rows: 0
drop table phy;
Affected Rows: 0
CREATE TABLE IF NOT EXISTS "phy" (
"ts" TIMESTAMP(3) NOT NULL,
"val" DOUBLE NULL,
"__table_id" INT UNSIGNED NOT NULL,
"__tsid" BIGINT UNSIGNED NOT NULL,
"host" STRING NULL,
"job" STRING NULL,
TIME INDEX ("ts"),
PRIMARY KEY ("__table_id", "__tsid", "host", "job")
)
ENGINE=mito
WITH(
physical_metric_table = '',
);
Affected Rows: 0
show create table phy;
+-------+-------------------------------------------------------+
| Table | Create Table |
+-------+-------------------------------------------------------+
| phy | CREATE TABLE IF NOT EXISTS "phy" ( |
| | "ts" TIMESTAMP(3) NOT NULL, |
| | "val" DOUBLE NULL, |
| | "__table_id" INT UNSIGNED NOT NULL, |
| | "__tsid" BIGINT UNSIGNED NOT NULL, |
| | "host" STRING NULL, |
| | "job" STRING NULL, |
| | TIME INDEX ("ts"), |
| | PRIMARY KEY ("__table_id", "__tsid", "host", "job") |
| | ) |
| | |
| | ENGINE=mito |
| | WITH( |
| | physical_metric_table = '' |
| | ) |
+-------+-------------------------------------------------------+
drop table phy;
Affected Rows: 0

View File

@@ -48,3 +48,34 @@ ENGINE=mito
WITH(
storage = 'S3'
);
CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "");
CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy");
show create table phy;
show create table t1;
drop table t1;
drop table phy;
CREATE TABLE IF NOT EXISTS "phy" (
"ts" TIMESTAMP(3) NOT NULL,
"val" DOUBLE NULL,
"__table_id" INT UNSIGNED NOT NULL,
"__tsid" BIGINT UNSIGNED NOT NULL,
"host" STRING NULL,
"job" STRING NULL,
TIME INDEX ("ts"),
PRIMARY KEY ("__table_id", "__tsid", "host", "job")
)
ENGINE=mito
WITH(
physical_metric_table = '',
);
show create table phy;
drop table phy;