diff --git a/.github/actions/setup-kafka-cluster/action.yml b/.github/actions/setup-kafka-cluster/action.yml index b8a7339423..22b4389957 100644 --- a/.github/actions/setup-kafka-cluster/action.yml +++ b/.github/actions/setup-kafka-cluster/action.yml @@ -18,6 +18,8 @@ runs: --set controller.replicaCount=${{ inputs.controller-replicas }} \ --set controller.resources.requests.cpu=50m \ --set controller.resources.requests.memory=128Mi \ + --set controller.resources.limits.cpu=2000m \ + --set controller.resources.limits.memory=2Gi \ --set listeners.controller.protocol=PLAINTEXT \ --set listeners.client.protocol=PLAINTEXT \ --create-namespace \ diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 6eccbe65b8..8939453f9d 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -323,8 +323,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: @@ -474,8 +472,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 09fcc5c26e..afe01f11ec 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -12,7 +12,7 @@ on: linux_amd64_runner: type: choice description: The runner uses to build linux-amd64 artifacts - default: ec2-c6i.2xlarge-amd64 + default: ec2-c6i.4xlarge-amd64 options: - ubuntu-20.04 - ubuntu-20.04-8-cores @@ -27,7 +27,7 @@ on: linux_arm64_runner: type: choice description: The runner uses to build linux-arm64 artifacts - default: ec2-c6g.2xlarge-arm64 + default: ec2-c6g.4xlarge-arm64 options: - ec2-c6g.xlarge-arm64 # 4C8G - ec2-c6g.2xlarge-arm64 # 8C16G diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index b6ff247ffb..285fb61a7c 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -114,6 +114,17 @@ jobs: GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }} UNITTEST_LOG_DIR: "__unittest_logs" + cleanbuild-linux-nix: + runs-on: ubuntu-latest-8-cores + timeout-minutes: 60 + needs: [coverage, fmt, clippy, check] + steps: + - uses: actions/checkout@v4 + - uses: cachix/install-nix-action@v27 + with: + nix_path: nixpkgs=channel:nixos-unstable + - run: nix-shell --pure --run "cargo build" + check-status: name: Check status needs: [sqlness-test, sqlness-windows, test-on-windows] diff --git a/.gitignore b/.gitignore index c1b0a89618..5823287889 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,10 @@ benchmarks/data venv/ -# Fuzz tests +# Fuzz tests tests-fuzz/artifacts/ tests-fuzz/corpus/ + +# Nix +.direnv +.envrc diff --git a/Cargo.lock b/Cargo.lock index 628c6a5824..fa8ba34d1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,12 +222,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "approx_eq" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3f9eb837c6a783fbf002e3e5cc7925a3aa6893d6d42f9169517528983777590" - [[package]] name = "aquamarine" version = "0.3.3" @@ -872,18 +866,6 @@ dependencies = [ "rand", ] -[[package]] -name = "backon" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" -dependencies = [ - "fastrand", - "futures-core", - "pin-project", - "tokio", -] - [[package]] name = "backon" version = "1.2.0" @@ -1310,7 +1292,6 @@ dependencies = [ "common-meta", "moka", "snafu 0.8.5", - "substrait 0.12.0", ] [[package]] @@ -1349,7 +1330,6 @@ dependencies = [ "catalog", "chrono", "common-catalog", - "common-config", "common-error", "common-macro", "common-meta", @@ -1358,7 +1338,6 @@ dependencies = [ "common-recordbatch", "common-runtime", "common-telemetry", - "common-test-util", "common-time", "common-version", "dashmap", @@ -1369,7 +1348,6 @@ dependencies = [ "humantime", "itertools 0.10.5", "lazy_static", - "log-store", "meta-client", "moka", "object-store", @@ -1693,7 +1671,6 @@ dependencies = [ "common-grpc", "common-macro", "common-meta", - "common-options", "common-procedure", "common-query", "common-recordbatch", @@ -1722,7 +1699,6 @@ dependencies = [ "store-api", "substrait 0.12.0", "table", - "temp-env", "tempfile", "tokio", "tracing-appender", @@ -1746,8 +1722,6 @@ dependencies = [ "common-query", "common-recordbatch", "common-telemetry", - "datanode", - "derive-new 0.5.9", "enum_dispatch", "futures-util", "lazy_static", @@ -1918,6 +1892,7 @@ dependencies = [ "futures", "paste", "pin-project", + "rand", "serde", "snafu 0.8.5", "tokio", @@ -1928,13 +1903,6 @@ dependencies = [ [[package]] name = "common-catalog" version = "0.12.0" -dependencies = [ - "chrono", - "common-error", - "common-macro", - "snafu 0.8.5", - "tokio", -] [[package]] name = "common-config" @@ -1978,7 +1946,6 @@ dependencies = [ "datafusion", "datatypes", "derive_builder 0.12.0", - "dotenv", "futures", "lazy_static", "object-store", @@ -2022,15 +1989,10 @@ dependencies = [ name = "common-frontend" version = "0.12.0" dependencies = [ - "api", "async-trait", - "common-base", "common-error", "common-macro", - "common-query", - "session", "snafu 0.8.5", - "sql", ] [[package]] @@ -2064,7 +2026,6 @@ dependencies = [ "num-traits", "once_cell", "paste", - "ron", "s2", "serde", "serde_json", @@ -2255,7 +2216,7 @@ version = "0.12.0" dependencies = [ "async-stream", "async-trait", - "backon 1.2.0", + "backon", "common-base", "common-error", "common-macro", @@ -2353,8 +2314,6 @@ dependencies = [ "snafu 0.8.5", "tempfile", "tokio", - "tokio-metrics", - "tokio-metrics-collector", "tokio-test", "tokio-util", ] @@ -2834,16 +2793,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ctor" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" -dependencies = [ - "quote", - "syn 1.0.109", -] - [[package]] name = "darling" version = "0.14.4" @@ -3386,17 +3335,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "derive-new" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "derive-new" version = "0.7.0" @@ -3885,6 +3823,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" +[[package]] +name = "fastbloom" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b679f25009b51b71506296f95fb6362ba7d0151172fa7373a8d1611b8bc5d10f" +dependencies = [ + "getrandom", + "rand", + "siphasher 1.0.1", + "wide", +] + [[package]] name = "fastdivide" version = "0.4.1" @@ -3919,7 +3869,6 @@ dependencies = [ "common-error", "common-macro", "common-procedure", - "common-procedure-test", "common-query", "common-recordbatch", "common-telemetry", @@ -4067,7 +4016,6 @@ dependencies = [ "itertools 0.10.5", "lazy_static", "meta-client", - "minstant", "nom", "num-traits", "operator", @@ -4114,15 +4062,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "format_num" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14ac05eb8d2eb4ed1eeff847911deae077b0b53332465de9d6a26b0ea9961bc8" -dependencies = [ - "regex", -] - [[package]] name = "fragile" version = "2.0.0" @@ -4145,7 +4084,6 @@ dependencies = [ "common-config", "common-datasource", "common-error", - "common-frontend", "common-function", "common-grpc", "common-macro", @@ -4167,7 +4105,6 @@ dependencies = [ "lazy_static", "log-store", "meta-client", - "meta-srv", "opentelemetry-proto 0.5.0", "operator", "partition", @@ -5277,6 +5214,7 @@ dependencies = [ "common-runtime", "common-telemetry", "common-test-util", + "fastbloom", "fst", "futures", "greptime-proto", @@ -5287,6 +5225,7 @@ dependencies = [ "regex", "regex-automata 0.4.8", "serde", + "serde_json", "snafu 0.8.5", "tantivy", "tantivy-jieba", @@ -6076,6 +6015,18 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "local-ip-address" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3669cf5561f8d27e8fc84cc15e58350e70f557d4d65f70e3154e54cd2f8e1782" +dependencies = [ + "libc", + "neli", + "thiserror 1.0.64", + "windows-sys 0.59.0", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -6600,16 +6551,6 @@ dependencies = [ "adler2", ] -[[package]] -name = "minstant" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb9b5c752f145ac5046bccc3c4f62892e3c950c1d1eab80c5949cd68a2078db" -dependencies = [ - "ctor", - "web-time 1.1.0", -] - [[package]] name = "mio" version = "0.8.11" @@ -6643,6 +6584,7 @@ dependencies = [ "async-channel 1.9.0", "async-stream", "async-trait", + "bytemuck", "bytes", "common-base", "common-config", @@ -6652,7 +6594,6 @@ dependencies = [ "common-function", "common-macro", "common-meta", - "common-procedure-test", "common-query", "common-recordbatch", "common-runtime", @@ -7052,6 +6993,31 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" +[[package]] +name = "neli" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1100229e06604150b3becd61a4965d5c70f3be1759544ea7274166f4be41ef43" +dependencies = [ + "byteorder", + "libc", + "log", + "neli-proc-macros", +] + +[[package]] +name = "neli-proc-macros" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c168194d373b1e134786274020dae7fc5513d565ea2ebb9bc9ff17ffb69106d4" +dependencies = [ + "either", + "proc-macro2", + "quote", + "serde", + "syn 1.0.109", +] + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -7408,13 +7374,13 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "opendal" -version = "0.49.2" +version = "0.50.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b04d09b9822c2f75a1d2fc513a2c1279c70e91e7407936fffdf6a6976ec530a" +checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" dependencies = [ "anyhow", "async-trait", - "backon 0.4.4", + "backon", "base64 0.22.1", "bytes", "chrono", @@ -7427,6 +7393,7 @@ dependencies = [ "md-5", "once_cell", "percent-encoding", + "prometheus", "quick-xml 0.36.2", "reqsign", "reqwest", @@ -8089,7 +8056,7 @@ dependencies = [ "async-trait", "bytes", "chrono", - "derive-new 0.7.0", + "derive-new", "futures", "hex", "lazy-regex", @@ -8139,7 +8106,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" dependencies = [ - "siphasher", + "siphasher 0.3.11", ] [[package]] @@ -8148,7 +8115,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ - "siphasher", + "siphasher 0.3.11", ] [[package]] @@ -8229,7 +8196,6 @@ dependencies = [ "query", "rayon", "regex", - "ron", "serde", "serde_json", "session", @@ -8641,10 +8607,7 @@ dependencies = [ "greptime-proto", "lazy_static", "prometheus", - "promql-parser", "prost 0.12.6", - "query", - "session", "snafu 0.8.5", "tokio", ] @@ -8883,6 +8846,7 @@ dependencies = [ "lz4_flex 0.11.3", "moka", "pin-project", + "prometheus", "serde", "serde_json", "sha2", @@ -8991,7 +8955,6 @@ version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", - "approx_eq", "arc-swap", "arrow", "arrow-schema", @@ -9023,7 +8986,6 @@ dependencies = [ "datafusion-sql", "datatypes", "fastrand", - "format_num", "futures", "futures-util", "greptime-proto", @@ -9051,9 +9013,7 @@ dependencies = [ "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "statrs", - "stats-cli", "store-api", - "streaming-stats", "substrait 0.12.0", "table", "tokio", @@ -9416,9 +9376,9 @@ dependencies = [ [[package]] name = "reqsign" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa" +checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149" dependencies = [ "anyhow", "async-trait", @@ -9447,9 +9407,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -10086,7 +10046,7 @@ dependencies = [ "once_cell", "radium", "rand", - "siphasher", + "siphasher 0.3.11", "unic-ucd-category", "volatile", "widestring", @@ -10544,7 +10504,6 @@ dependencies = [ "datatypes", "futures", "lazy_static", - "log-store", "once_cell", "operator", "paste", @@ -10567,7 +10526,6 @@ dependencies = [ "sql", "table", "tokio", - "tokio-test", ] [[package]] @@ -10909,7 +10867,6 @@ dependencies = [ "tokio-postgres-rustls", "tokio-rustls 0.26.0", "tokio-stream", - "tokio-test", "tokio-util", "tonic 0.11.0", "tonic-reflection", @@ -11100,6 +11057,12 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "sketches-ddsketch" version = "0.2.2" @@ -11295,6 +11258,7 @@ dependencies = [ "jsonb", "lazy_static", "regex", + "serde", "serde_json", "snafu 0.8.5", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", @@ -11343,14 +11307,21 @@ dependencies = [ "common-recordbatch", "common-time", "datatypes", + "flate2", + "hex", + "local-ip-address", "mysql", + "reqwest", "serde", "serde_json", + "sha2", "sqlness", + "tar", "tempfile", "tinytemplate", "tokio", "tokio-postgres", + "tokio-stream", ] [[package]] @@ -11371,6 +11342,7 @@ dependencies = [ "lazy_static", "log", "regex", + "serde", "sqlparser 0.45.0 (registry+https://github.com/rust-lang/crates.io-index)", "sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", ] @@ -11541,16 +11513,6 @@ dependencies = [ "rand", ] -[[package]] -name = "stats-cli" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8786c4fc8a91bc4fcd90aed33413f79e4dc9811f24ba14d1d59adf57cf1c871" -dependencies = [ - "clap 2.34.0", - "num-traits", -] - [[package]] name = "store-api" version = "0.12.0" @@ -11592,15 +11554,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" -[[package]] -name = "streaming-stats" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d670ce4e348a2081843569e0f79b21c99c91bb9028b3b3ecb0f050306de547" -dependencies = [ - "num-traits", -] - [[package]] name = "strfmt" version = "0.2.4" @@ -12124,6 +12077,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-lexicon" version = "0.12.16" @@ -12194,6 +12158,7 @@ dependencies = [ "arbitrary", "async-trait", "chrono", + "common-base", "common-error", "common-macro", "common-query", @@ -12557,30 +12522,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "tokio-metrics" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eace09241d62c98b7eeb1107d4c5c64ca3bd7da92e8c218c153ab3a78f9be112" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - -[[package]] -name = "tokio-metrics-collector" -version = "0.2.1" -source = "git+https://github.com/MichaelScofield/tokio-metrics-collector.git?rev=89d692d5753d28564a7aac73c6ac5aba22243ba0#89d692d5753d28564a7aac73c6ac5aba22243ba0" -dependencies = [ - "lazy_static", - "parking_lot 0.12.3", - "prometheus", - "tokio", - "tokio-metrics", -] - [[package]] name = "tokio-postgres" version = "0.7.12" @@ -13007,7 +12948,7 @@ dependencies = [ "tracing-core", "tracing-log 0.2.0", "tracing-subscriber", - "web-time 0.2.4", + "web-time", ] [[package]] @@ -13778,16 +13719,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "webbrowser" version = "0.8.15" @@ -14282,6 +14213,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + [[package]] name = "xml-rs" version = "0.8.22" diff --git a/Cargo.toml b/Cargo.toml index d1d360850e..990bc71a90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -180,6 +180,7 @@ sysinfo = "0.30" # on branch v0.44.x sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [ "visitor", + "serde", ] } strum = { version = "0.25", features = ["derive"] } tempfile = "3" diff --git a/config/config.md b/config/config.md index 6a500a5b4a..d3353930b1 100644 --- a/config/config.md +++ b/config/config.md @@ -150,6 +150,7 @@ | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. | | `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | | `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | +| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. | | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. | | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically (default)
- `disable`: never | @@ -475,6 +476,9 @@ | `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.
- `auto`: automatically determine the threshold based on the system memory size (default)
- `unlimited`: no memory limit
- `[size]` e.g. `64MB`: fixed memory threshold | | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. | +| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | +| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | +| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. | | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. | | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically (default)
- `disable`: never | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 0ba80a9f7d..90a4d69b2e 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -543,6 +543,15 @@ mem_threshold_on_create = "auto" ## Deprecated, use `region_engine.mito.index.aux_path` instead. intermediate_path = "" +## Cache size for inverted index metadata. +metadata_cache_size = "64MiB" + +## Cache size for inverted index content. +content_cache_size = "128MiB" + +## Page size for inverted index content cache. +content_cache_page_size = "8MiB" + ## The options for full-text index in Mito engine. [region_engine.mito.fulltext_index] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 8eae532d61..b73246d37f 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -588,6 +588,9 @@ metadata_cache_size = "64MiB" ## Cache size for inverted index content. content_cache_size = "128MiB" +## Page size for inverted index content cache. +content_cache_page_size = "8MiB" + ## The options for full-text index in Mito engine. [region_engine.mito.fulltext_index] diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index 7c6dfb0751..9657565c27 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -145,7 +145,9 @@ "countRows": false, "enablePagination": false, "fields": [], - "reducer": ["sum"], + "reducer": [ + "sum" + ], "show": false }, "showHeader": true, @@ -223,7 +225,9 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -298,7 +302,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -595,7 +601,7 @@ "type": "timeseries" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -603,776 +609,772 @@ "y": 10 }, "id": 24, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 34, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "promql-{{db}}-p95", + "range": true, + "refId": "PromQL P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "promql-{{db}}-p99", + "range": true, + "refId": "PromQL P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "sql-{{db}}-p95", + "range": true, + "refId": "SQL P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "sql-{{db}}-p99", + "range": true, + "refId": "SQL P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-read-{{db}}-p95", + "range": true, + "refId": "PromStore Read P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-read-{{db}}-p99", + "range": true, + "refId": "PromStore Read P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "prom-promql-{{db}}-{{method}}-p95", + "range": true, + "refId": "Prometheus PromQL P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "prom-promql-{{db}}-{{method}}-p99", + "range": true, + "refId": "Prometheus PromQL P99" + } + ], + "title": "HTTP query elapsed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 35, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "influx-{{db}}-p95", + "range": true, + "refId": "InfluxDB Line Protocol P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "influx-{{db}}-p99", + "range": true, + "refId": "InfluxDB Line Protocol P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-{{db}}-p95", + "range": true, + "refId": "PromStore Write P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-{{db}}-p99", + "range": true, + "refId": "PromStore Write P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-metric-{{db}}-p95", + "range": true, + "refId": "OTLP Metric P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-metric-{{db}}-p99", + "range": true, + "refId": "OTLP Metric P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-trace-{{db}}-p95", + "range": true, + "refId": "OTLP Trace P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-trace-{{db}}-p99", + "range": true, + "refId": "OTLP Trace P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-transform-{{db}}-p95", + "range": true, + "refId": "Log Transform P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-transform-{{db}}-p99", + "range": true, + "refId": "Log Transform P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-ingest-{{db}}-p99", + "range": true, + "refId": "Log Ingest P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-ingest-{{db}}-p99", + "range": true, + "refId": "Log Ingest P99" + } + ], + "title": "HTTP write elapsed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 38, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(path) (rate(greptime_servers_http_requests_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "HTTP request rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 36, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(db) (rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Logs ingest rate (number of lines)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 13, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}-p99", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "gRPC insert elapsed", + "type": "timeseries" + } + ], "title": "Protocol", "type": "row" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 11 - }, - "id": 34, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "promql-{{db}}-p95", - "range": true, - "refId": "PromQL P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "promql-{{db}}-p99", - "range": true, - "refId": "PromQL P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "sql-{{db}}-p95", - "range": true, - "refId": "SQL P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "sql-{{db}}-p99", - "range": true, - "refId": "SQL P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-read-{{db}}-p95", - "range": true, - "refId": "PromStore Read P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-read-{{db}}-p99", - "range": true, - "refId": "PromStore Read P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "prom-promql-{{db}}-{{method}}-p95", - "range": true, - "refId": "Prometheus PromQL P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "prom-promql-{{db}}-{{method}}-p99", - "range": true, - "refId": "Prometheus PromQL P99" - } - ], - "title": "HTTP query elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 11 - }, - "id": 35, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "influx-{{db}}-p95", - "range": true, - "refId": "InfluxDB Line Protocol P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "influx-{{db}}-p99", - "range": true, - "refId": "InfluxDB Line Protocol P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-{{db}}-p95", - "range": true, - "refId": "PromStore Write P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-{{db}}-p99", - "range": true, - "refId": "PromStore Write P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-metric-{{db}}-p95", - "range": true, - "refId": "OTLP Metric P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-metric-{{db}}-p99", - "range": true, - "refId": "OTLP Metric P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-trace-{{db}}-p95", - "range": true, - "refId": "OTLP Trace P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-trace-{{db}}-p99", - "range": true, - "refId": "OTLP Trace P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-transform-{{db}}-p95", - "range": true, - "refId": "Log Transform P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-transform-{{db}}-p99", - "range": true, - "refId": "Log Transform P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-ingest-{{db}}-p99", - "range": true, - "refId": "Log Ingest P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-ingest-{{db}}-p99", - "range": true, - "refId": "Log Ingest P99" - } - ], - "title": "HTTP write elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 38, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(path) (rate(greptime_servers_http_requests_total[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "HTTP request rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 18 - }, - "id": 36, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(db) (rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Logs ingest rate (number of lines)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 13, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}-p95", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}-p99", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "gRPC insert elapsed", - "type": "timeseries" - }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 11 }, "id": 25, "panels": [], @@ -1426,7 +1428,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1441,7 +1444,7 @@ "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 12 }, "id": 1, "interval": "1s", @@ -1542,7 +1545,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1557,7 +1561,7 @@ "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 12 }, "id": 7, "interval": "1s", @@ -1641,7 +1645,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1656,7 +1661,7 @@ "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 19 }, "id": 3, "interval": "1s", @@ -1740,7 +1745,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1755,7 +1761,7 @@ "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 19 }, "id": 11, "interval": "1s", @@ -1856,7 +1862,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1871,7 +1878,7 @@ "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 26 }, "id": 15, "interval": "1s", @@ -1968,7 +1975,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1983,10 +1991,9 @@ "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 26 }, "id": 39, - "interval": "1s", "options": { "legend": { "calcs": [], @@ -2006,12 +2013,12 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[5m])", + "editorMode": "builder", + "expr": "greptime_mito_inflight_compaction_count", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "compaction-{{stage}}", + "legendFormat": "compaction-{{instance}}", "range": true, "refId": "A", "useBackend": false @@ -2021,13 +2028,17 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_mito_flush_elapsed_bucket[$__rate_interval])))", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "greptime_mito_inflight_flush_count", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": true, "instant": false, - "legendFormat": "flush-{{type}}", + "legendFormat": "flush-{{instance}}", "range": true, - "refId": "B" + "refId": "B", + "useBackend": false } ], "title": "Flush / compaction count", @@ -2080,7 +2091,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2096,7 +2108,7 @@ "h": 7, "w": 12, "x": 0, - "y": 54 + "y": 33 }, "id": 9, "interval": "1s", @@ -2193,7 +2205,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2208,7 +2221,7 @@ "h": 7, "w": 12, "x": 12, - "y": 54 + "y": 33 }, "id": 40, "interval": "1s", @@ -2231,8 +2244,8 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_mito_write_stall_total", + "editorMode": "builder", + "expr": "rate(greptime_mito_write_stall_total[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -2292,7 +2305,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2308,7 +2322,7 @@ "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 40 }, "id": 41, "interval": "1s", @@ -2392,7 +2406,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2408,7 +2423,7 @@ "h": 7, "w": 12, "x": 12, - "y": 61 + "y": 40 }, "id": 42, "interval": "1s", @@ -2446,939 +2461,1684 @@ "type": "timeseries" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 68 + "y": 47 }, "id": 26, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 22, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "p95-{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99-{{operation}}", + "range": true, + "refId": "B" + } + ], + "title": "Metric engine to mito R/W duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 33, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "p95-{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99-{{label_name}}", + "range": true, + "refId": "B" + } + ], + "title": "Metric engine to mito DDL duration", + "type": "timeseries" + } + ], "title": "Metric Engine", "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 69 - }, - "id": 22, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "p95-{{operation}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "p99-{{operation}}", - "range": true, - "refId": "B" - } - ], - "title": "Metric engine to mito R/W duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 69 - }, - "id": 33, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "p95-{{operation}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "p99-{{label_name}}", - "range": true, - "refId": "B" - } - ], - "title": "Metric engine to mito DDL duration", - "type": "timeseries" - }, - { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 76 + "y": 48 }, "id": 21, - "panels": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" + } + ], + "title": "WAL sync duration seconds", + "type": "timeseries" + } + ], "title": "Storage Components", "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "collapsed": true, "gridPos": { - "h": 7, - "w": 12, + "h": 1, + "w": 24, "x": 0, - "y": 77 + "y": 49 }, - "id": 18, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ + "id": 46, + "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "OpenDAL traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 45, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - } + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_index_create_memory_usage", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "greptime_index_apply_memory_usage", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "Index memory usage", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 77 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "OpenDAL operation duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 19, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P99", + "useBackend": false + } + ], + "title": "Index elapsed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 84 - }, - "id": 10, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", - "range": true, - "refId": "Log Store P99" - } - ], - "title": "Log Store op duration seconds", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 47, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 84 - }, - "id": 19, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P95", - "useBackend": false + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Index create rows total", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P99", - "useBackend": false - } - ], - "title": "Index elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 48, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - } + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index create bytes", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 91 - }, - "id": 37, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", - "range": true, - "refId": "Log Store P95" - } - ], - "title": "WAL sync duration seconds", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 49, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 91 - }, - "id": 12, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p95", - "range": true, - "refId": "A", - "useBackend": false + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index IO bytes", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p99", - "range": true, - "refId": "C", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", - "hide": false, - "instant": false, - "legendFormat": "throughput", - "range": true, - "refId": "B" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 50, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index IO op", + "type": "timeseries" } ], - "title": "WAL write size", - "type": "timeseries" + "title": "Index", + "type": "row" } ], "refresh": "10s", @@ -3395,6 +4155,6 @@ "timezone": "", "title": "GreptimeDB", "uid": "e7097237-669b-4f8d-b751-13067afbfb68", - "version": 16, + "version": 18, "weekStart": "" } diff --git a/rust-toolchain.toml b/rust-toolchain.toml index c986eedd97..d12222a5d3 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,3 @@ [toolchain] channel = "nightly-2024-10-19" +components = ["rust-analyzer"] diff --git a/scripts/check-snafu.py b/scripts/check-snafu.py index d44edfeb8c..b91950692b 100644 --- a/scripts/check-snafu.py +++ b/scripts/check-snafu.py @@ -58,8 +58,10 @@ def main(): if not check_snafu_in_files(branch_name, other_rust_files) ] - for name in unused_snafu: - print(name) + if unused_snafu: + print("Unused error variants:") + for name in unused_snafu: + print(name) if unused_snafu: raise SystemExit(1) diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000000..ce84a03276 --- /dev/null +++ b/shell.nix @@ -0,0 +1,27 @@ +let + nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable"; + fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {}; + pkgs = import nixpkgs { config = {}; overlays = []; }; +in + +pkgs.mkShell rec { + nativeBuildInputs = with pkgs; [ + pkg-config + git + clang + gcc + protobuf + mold + (fenix.fromToolchainFile { + dir = ./.; + }) + cargo-nextest + taplo + ]; + + buildInputs = with pkgs; [ + libgit2 + ]; + + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs; +} diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index f026d3f6f9..77dcd2c621 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY, - FULLTEXT_KEY, INVERTED_INDEX_KEY, + FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, }; use greptime_proto::v1::Analyzer; use snafu::ResultExt; @@ -29,6 +29,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType}; const FULLTEXT_GRPC_KEY: &str = "fulltext"; /// Key used to store inverted index options in gRPC column options. const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index"; +/// Key used to store skip index options in gRPC column options. +const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index"; /// Tries to construct a `ColumnSchema` from the given `ColumnDef`. pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { @@ -60,6 +62,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) { metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone()); } + if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) { + metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone()); + } } ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) @@ -84,6 +89,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option &mut Datanode { - &mut self.datanode - } - pub fn datanode(&self) -> &Datanode { &self.datanode } diff --git a/src/cmd/src/flownode.rs b/src/cmd/src/flownode.rs index a9ad12bfbc..b399bf37f7 100644 --- a/src/cmd/src/flownode.rs +++ b/src/cmd/src/flownode.rs @@ -63,10 +63,6 @@ impl Instance { } } - pub fn flownode_mut(&mut self) -> &mut FlownodeInstance { - &mut self.flownode - } - pub fn flownode(&self) -> &FlownodeInstance { &self.flownode } diff --git a/src/common/base/Cargo.toml b/src/common/base/Cargo.toml index 465599974d..2d35ad5d31 100644 --- a/src/common/base/Cargo.toml +++ b/src/common/base/Cargo.toml @@ -17,6 +17,7 @@ common-macro.workspace = true futures.workspace = true paste = "1.0" pin-project.workspace = true +rand.workspace = true serde = { version = "1.0", features = ["derive"] } snafu.workspace = true tokio.workspace = true diff --git a/src/common/base/src/range_read.rs b/src/common/base/src/range_read.rs index 91f865d17e..53c26eeebd 100644 --- a/src/common/base/src/range_read.rs +++ b/src/common/base/src/range_read.rs @@ -36,6 +36,11 @@ pub struct Metadata { /// `RangeReader` reads a range of bytes from a source. #[async_trait] pub trait RangeReader: Send + Unpin { + /// Sets the file size hint for the reader. + /// + /// It's used to optimize the reading process by reducing the number of remote requests. + fn with_file_size_hint(&mut self, file_size_hint: u64); + /// Returns the metadata of the source. async fn metadata(&mut self) -> io::Result; @@ -70,6 +75,10 @@ pub trait RangeReader: Send + Unpin { #[async_trait] impl RangeReader for &mut R { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + (*self).with_file_size_hint(file_size_hint) + } + async fn metadata(&mut self) -> io::Result { (*self).metadata().await } @@ -186,15 +195,17 @@ impl AsyncRead for AsyncReadAdapter { #[async_trait] impl RangeReader for Vec { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.len() as u64, }) } - async fn read(&mut self, mut range: Range) -> io::Result { - range.end = range.end.min(self.len() as u64); - + async fn read(&mut self, range: Range) -> io::Result { let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]); Ok(bytes) } @@ -222,6 +233,10 @@ impl FileReader { #[async_trait] impl RangeReader for FileReader { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.content_length, diff --git a/src/common/base/src/readable_size.rs b/src/common/base/src/readable_size.rs index 21908526c7..4298989291 100644 --- a/src/common/base/src/readable_size.rs +++ b/src/common/base/src/readable_size.rs @@ -19,7 +19,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE; pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE; pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE; -#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Default)] pub struct ReadableSize(pub u64); impl ReadableSize { diff --git a/src/common/catalog/Cargo.toml b/src/common/catalog/Cargo.toml index 61f49ab0e4..051675fe93 100644 --- a/src/common/catalog/Cargo.toml +++ b/src/common/catalog/Cargo.toml @@ -8,10 +8,5 @@ license.workspace = true workspace = true [dependencies] -common-error.workspace = true -common-macro.workspace = true -snafu.workspace = true [dev-dependencies] -chrono.workspace = true -tokio.workspace = true diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 65f1d18a66..16137e6b3e 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -48,5 +48,4 @@ url = "2.3" [dev-dependencies] common-telemetry.workspace = true common-test-util.workspace = true -dotenv.workspace = true uuid.workspace = true diff --git a/src/common/datasource/src/object_store/fs.rs b/src/common/datasource/src/object_store/fs.rs index f87311f517..5ffbbfa314 100644 --- a/src/common/datasource/src/object_store/fs.rs +++ b/src/common/datasource/src/object_store/fs.rs @@ -27,7 +27,7 @@ pub fn build_fs_backend(root: &str) -> Result { DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::PrometheusMetricsLayer::new(true)) + .layer(object_store::layers::build_prometheus_metrics_layer(true)) .finish(); Ok(object_store) } diff --git a/src/common/datasource/src/object_store/s3.rs b/src/common/datasource/src/object_store/s3.rs index e141621b89..0d83eb7a98 100644 --- a/src/common/datasource/src/object_store/s3.rs +++ b/src/common/datasource/src/object_store/s3.rs @@ -89,7 +89,7 @@ pub fn build_s3_backend( DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::PrometheusMetricsLayer::new(true)) + .layer(object_store::layers::build_prometheus_metrics_layer(true)) .finish()) } diff --git a/src/common/frontend/Cargo.toml b/src/common/frontend/Cargo.toml index 2aa111fa1a..7c3b705bdd 100644 --- a/src/common/frontend/Cargo.toml +++ b/src/common/frontend/Cargo.toml @@ -5,12 +5,7 @@ edition.workspace = true license.workspace = true [dependencies] -api.workspace = true async-trait.workspace = true -common-base.workspace = true common-error.workspace = true common-macro.workspace = true -common-query.workspace = true -session.workspace = true snafu.workspace = true -sql.workspace = true diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index 29cefb1e75..e7cc25ca13 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -51,6 +51,5 @@ wkt = { version = "0.11", optional = true } [dev-dependencies] approx = "0.5" -ron = "0.7" serde = { version = "1.0", features = ["derive"] } tokio.workspace = true diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index 7c8cf5550e..d462b917af 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -15,6 +15,8 @@ mod convert; mod distance; pub(crate) mod impl_conv; +mod scalar_add; +mod scalar_mul; use std::sync::Arc; @@ -32,5 +34,9 @@ impl VectorFunction { registry.register(Arc::new(distance::CosDistanceFunction)); registry.register(Arc::new(distance::DotProductFunction)); registry.register(Arc::new(distance::L2SqDistanceFunction)); + + // scalar calculation + registry.register(Arc::new(scalar_add::ScalarAddFunction)); + registry.register(Arc::new(scalar_mul::ScalarMulFunction)); } } diff --git a/src/common/function/src/scalars/vector/impl_conv.rs b/src/common/function/src/scalars/vector/impl_conv.rs index 903bfb2a03..70a142c290 100644 --- a/src/common/function/src/scalars/vector/impl_conv.rs +++ b/src/common/function/src/scalars/vector/impl_conv.rs @@ -109,7 +109,6 @@ pub fn parse_veclit_from_strlit(s: &str) -> Result> { }) } -#[allow(unused)] /// Convert a vector literal to a binary literal. pub fn veclit_to_binlit(vec: &[f32]) -> Vec { if cfg!(target_endian = "little") { diff --git a/src/common/function/src/scalars/vector/scalar_add.rs b/src/common/function/src/scalars/vector/scalar_add.rs new file mode 100644 index 0000000000..ef016eff4b --- /dev/null +++ b/src/common/function/src/scalars/vector/scalar_add.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_scalar_add"; + +/// Adds a scalar to each element of a vector. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_scalar_add(1, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [2,3,4] | +/// +---------+ +/// +/// -- Negative scalar to simulate subtraction +/// SELECT vec_to_string(vec_scalar_add(-1, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [0,1,2] | +/// +---------+ +/// ``` +#[derive(Debug, Clone, Default)] +pub struct ScalarAddFunction; + +impl Function for ScalarAddFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ConcreteDataType::float64_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = arg0.get(i).as_f64_lossy(); + let Some(arg0) = arg0 else { + result.push_null(); + continue; + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + let Some(arg1) = arg1 else { + result.push_null(); + continue; + }; + + let vec = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec.add_scalar(arg0 as _); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } + + Ok(result.to_vector()) + } +} + +impl Display for ScalarAddFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datatypes::vectors::{Float32Vector, StringVector}; + + use super::*; + + #[test] + fn test_scalar_add() { + let func = ScalarAddFunction; + + let input0 = Arc::new(Float32Vector::from(vec![ + Some(1.0), + Some(-1.0), + None, + Some(3.0), + ])); + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[3.0, 4.0, 5.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/src/common/function/src/scalars/vector/scalar_mul.rs b/src/common/function/src/scalars/vector/scalar_mul.rs new file mode 100644 index 0000000000..3c7fe4c070 --- /dev/null +++ b/src/common/function/src/scalars/vector/scalar_mul.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_scalar_mul"; + +/// Multiples a scalar to each element of a vector. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_scalar_mul(2, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [2,4,6] | +/// +---------+ +/// +/// -- 1/scalar to simulate division +/// SELECT vec_to_string(vec_scalar_mul(0.5, "[2, 4, 6]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [1,2,3] | +/// +---------+ +/// ``` +#[derive(Debug, Clone, Default)] +pub struct ScalarMulFunction; + +impl Function for ScalarMulFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ConcreteDataType::float64_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = arg0.get(i).as_f64_lossy(); + let Some(arg0) = arg0 else { + result.push_null(); + continue; + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + let Some(arg1) = arg1 else { + result.push_null(); + continue; + }; + + let vec = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec.scale(arg0 as _); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } + + Ok(result.to_vector()) + } +} + +impl Display for ScalarMulFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datatypes::vectors::{Float32Vector, StringVector}; + + use super::*; + + #[test] + fn test_scalar_mul() { + let func = ScalarMulFunction; + + let input0 = Arc::new(Float32Vector::from(vec![ + Some(2.0), + Some(-0.5), + None, + Some(3.0), + ])); + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[8.0,10.0,12.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[2.0, 4.0, 6.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[-4.0, -5.0, -6.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/src/common/meta/src/cache/table/table_route.rs b/src/common/meta/src/cache/table/table_route.rs index 2383a1ea13..840e52f8ae 100644 --- a/src/common/meta/src/cache/table/table_route.rs +++ b/src/common/meta/src/cache/table/table_route.rs @@ -49,14 +49,6 @@ impl TableRoute { TableRoute::Logical(_) => None, } } - - /// Returns [LogicalTableRouteValue] reference if it's [TableRoute::Logical]; Otherwise it returns [None]. - pub fn as_logical_table_route_ref(&self) -> Option<&Arc> { - match self { - TableRoute::Physical(_) => None, - TableRoute::Logical(table_route) => Some(table_route), - } - } } /// [TableRouteCache] caches the [TableId] to [TableRoute] mapping. diff --git a/src/common/meta/src/key/table_route.rs b/src/common/meta/src/key/table_route.rs index 96949d2b9f..b5ebf0b4b1 100644 --- a/src/common/meta/src/key/table_route.rs +++ b/src/common/meta/src/key/table_route.rs @@ -290,28 +290,6 @@ impl TableRouteManager { } } - /// Returns the [`PhysicalTableRouteValue`] in the first level, - /// It won't follow the [`LogicalTableRouteValue`] to find the next level [`PhysicalTableRouteValue`]. - /// - /// Returns an error if the first level value is not a [`PhysicalTableRouteValue`]. - pub async fn try_get_physical_table_route( - &self, - table_id: TableId, - ) -> Result> { - match self.storage.get(table_id).await? { - Some(route) => { - ensure!( - route.is_physical(), - UnexpectedLogicalRouteTableSnafu { - err_msg: format!("{route:?} is a non-physical TableRouteValue.") - } - ); - Ok(Some(route.into_physical_table_route())) - } - None => Ok(None), - } - } - /// Returns the [TableId] recursively. /// /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: @@ -569,37 +547,6 @@ impl TableRouteStorage { .transpose() } - /// Returns the physical `DeserializedValueWithBytes` recursively. - /// - /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: - /// - the physical table(`logical_or_physical_table_id`) does not exist - /// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist. - pub async fn get_physical_table_route_with_raw_bytes( - &self, - logical_or_physical_table_id: TableId, - ) -> Result<(TableId, DeserializedValueWithBytes)> { - let table_route = self - .get_with_raw_bytes(logical_or_physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: logical_or_physical_table_id, - })?; - - match table_route.get_inner_ref() { - TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)), - TableRouteValue::Logical(x) => { - let physical_table_id = x.physical_table_id(); - let physical_table_route = self - .get_with_raw_bytes(physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: physical_table_id, - })?; - Ok((physical_table_id, physical_table_route)) - } - } - } - /// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`. pub async fn batch_get(&self, table_ids: &[TableId]) -> Result>> { let mut table_routes = self.batch_get_inner(table_ids).await?; diff --git a/src/common/meta/src/kv_backend/etcd.rs b/src/common/meta/src/kv_backend/etcd.rs index 1cdd45bc5c..a787940b6d 100644 --- a/src/common/meta/src/kv_backend/etcd.rs +++ b/src/common/meta/src/kv_backend/etcd.rs @@ -15,6 +15,7 @@ use std::any::Any; use std::sync::Arc; +use common_telemetry::info; use etcd_client::{ Client, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse, TxnResponse, }; @@ -55,6 +56,7 @@ impl EtcdStore { } pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef { + info!("Connected to etcd"); Arc::new(Self { client, max_txn_ops, diff --git a/src/common/meta/src/rpc/router.rs b/src/common/meta/src/rpc/router.rs index dd7349ae8f..0e700cc6da 100644 --- a/src/common/meta/src/rpc/router.rs +++ b/src/common/meta/src/rpc/router.rs @@ -89,39 +89,6 @@ pub fn convert_to_region_leader_map(region_routes: &[RegionRoute]) -> HashMap>() } -/// Returns the HashMap<[RegionNumber], HashSet> -pub fn convert_to_region_peer_map( - region_routes: &[RegionRoute], -) -> HashMap> { - region_routes - .iter() - .map(|x| { - let set = x - .follower_peers - .iter() - .map(|p| p.id) - .chain(x.leader_peer.as_ref().map(|p| p.id)) - .collect::>(); - - (x.region.id.region_number(), set) - }) - .collect::>() -} - -/// Returns the HashMap<[RegionNumber], [LeaderState]>; -pub fn convert_to_region_leader_state_map( - region_routes: &[RegionRoute], -) -> HashMap { - region_routes - .iter() - .filter_map(|x| { - x.leader_state - .as_ref() - .map(|state| (x.region.id.region_number(), *state)) - }) - .collect::>() -} - pub fn find_region_leader( region_routes: &[RegionRoute], region_number: RegionNumber, @@ -147,19 +114,6 @@ pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Ve .collect() } -pub fn extract_all_peers(region_routes: &[RegionRoute]) -> Vec { - let mut peers = region_routes - .iter() - .flat_map(|x| x.leader_peer.iter().chain(x.follower_peers.iter())) - .collect::>() - .into_iter() - .cloned() - .collect::>(); - peers.sort_by_key(|x| x.id); - - peers -} - impl TableRoute { pub fn new(table: Table, region_routes: Vec) -> Self { let region_leaders = region_routes diff --git a/src/common/procedure/src/local/runner.rs b/src/common/procedure/src/local/runner.rs index c2d15001fb..bf277a0e72 100644 --- a/src/common/procedure/src/local/runner.rs +++ b/src/common/procedure/src/local/runner.rs @@ -544,7 +544,7 @@ mod tests { use common_test_util::temp_dir::create_temp_dir; use futures_util::future::BoxFuture; use futures_util::FutureExt; - use object_store::ObjectStore; + use object_store::{EntryMode, ObjectStore}; use tokio::sync::mpsc; use super::*; @@ -578,7 +578,11 @@ mod tests { ) { let dir = proc_path!(procedure_store, "{procedure_id}/"); let lister = object_store.list(&dir).await.unwrap(); - let mut files_in_dir: Vec<_> = lister.into_iter().map(|de| de.name().to_string()).collect(); + let mut files_in_dir: Vec<_> = lister + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .map(|de| de.name().to_string()) + .collect(); files_in_dir.sort_unstable(); assert_eq!(files, files_in_dir); } diff --git a/src/common/recordbatch/src/lib.rs b/src/common/recordbatch/src/lib.rs index 257b6f0973..0281b45749 100644 --- a/src/common/recordbatch/src/lib.rs +++ b/src/common/recordbatch/src/lib.rs @@ -26,7 +26,6 @@ use std::sync::Arc; use adapter::RecordBatchMetrics; use arc_swap::ArcSwapOption; -use datafusion::physical_plan::memory::MemoryStream; pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datatypes::arrow::compute::SortOptions; pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch; @@ -170,19 +169,6 @@ impl RecordBatches { index: 0, }) } - - pub fn into_df_stream(self) -> DfSendableRecordBatchStream { - let df_record_batches = self - .batches - .into_iter() - .map(|batch| batch.into_df_record_batch()) - .collect(); - // unwrap safety: `MemoryStream::try_new` won't fail - Box::pin( - MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None) - .unwrap(), - ) - } } impl IntoIterator for RecordBatches { diff --git a/src/common/runtime/Cargo.toml b/src/common/runtime/Cargo.toml index c249ba221e..7a12a03ba9 100644 --- a/src/common/runtime/Cargo.toml +++ b/src/common/runtime/Cargo.toml @@ -35,8 +35,6 @@ serde_json.workspace = true snafu.workspace = true tempfile.workspace = true tokio.workspace = true -tokio-metrics = "0.3" -tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" } tokio-util.workspace = true [dev-dependencies] diff --git a/src/common/time/src/util.rs b/src/common/time/src/util.rs index 19fe3bc911..ccb9e1bdd0 100644 --- a/src/common/time/src/util.rs +++ b/src/common/time/src/util.rs @@ -29,10 +29,6 @@ pub fn format_utc_datetime(utc: &NaiveDateTime, pattern: &str) -> String { } } -pub fn system_datetime_to_utc(local: &NaiveDateTime) -> LocalResult { - datetime_to_utc(local, get_timezone(None)) -} - /// Cast a [`NaiveDateTime`] with the given timezone. pub fn datetime_to_utc( datetime: &NaiveDateTime, diff --git a/src/datanode/src/error.rs b/src/datanode/src/error.rs index 9fbd46e160..61a4eae128 100644 --- a/src/datanode/src/error.rs +++ b/src/datanode/src/error.rs @@ -193,6 +193,14 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to build http client"))] + BuildHttpClient { + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: reqwest::Error, + }, + #[snafu(display("Missing required field: {}", name))] MissingRequiredField { name: String, @@ -406,9 +414,10 @@ impl ErrorExt for Error { | MissingKvBackend { .. } | TomlFormat { .. } => StatusCode::InvalidArguments, - PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => { - StatusCode::Unexpected - } + PayloadNotExist { .. } + | Unexpected { .. } + | WatchAsyncTaskChange { .. } + | BuildHttpClient { .. } => StatusCode::Unexpected, AsyncTaskExecute { source, .. } => source.status_code(), diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs index c78afe448e..52a1cba982 100644 --- a/src/datanode/src/store.rs +++ b/src/datanode/src/store.rs @@ -32,7 +32,7 @@ use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, O use snafu::prelude::*; use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE}; -use crate::error::{self, CreateDirSnafu, Result}; +use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result}; pub(crate) async fn new_raw_object_store( store: &ObjectStoreConfig, @@ -236,7 +236,8 @@ pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result builder.timeout(config.timeout) }; - HttpClient::build(http_builder).context(error::InitBackendSnafu) + let client = http_builder.build().context(BuildHttpClientSnafu)?; + Ok(HttpClient::with(client)) } struct PrintDetailedError; diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 8f81a0c86f..b3342cc6f5 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -370,6 +370,51 @@ impl ConcreteDataType { _ => None, } } + + /// Return the datatype name in postgres type system + pub fn postgres_datatype_name(&self) -> &'static str { + match self { + &ConcreteDataType::Null(_) => "UNKNOWN", + &ConcreteDataType::Boolean(_) => "BOOL", + &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR", + &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2", + &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4", + &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8", + &ConcreteDataType::Float32(_) => "FLOAT4", + &ConcreteDataType::Float64(_) => "FLOAT8", + &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA", + &ConcreteDataType::String(_) => "VARCHAR", + &ConcreteDataType::Date(_) => "DATE", + &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP", + &ConcreteDataType::Time(_) => "TIME", + &ConcreteDataType::Interval(_) => "INTERVAL", + &ConcreteDataType::Decimal128(_) => "NUMERIC", + &ConcreteDataType::Json(_) => "JSON", + ConcreteDataType::List(list) => match list.item_type() { + &ConcreteDataType::Null(_) => "UNKNOWN", + &ConcreteDataType::Boolean(_) => "_BOOL", + &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR", + &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2", + &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4", + &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8", + &ConcreteDataType::Float32(_) => "_FLOAT4", + &ConcreteDataType::Float64(_) => "_FLOAT8", + &ConcreteDataType::Binary(_) => "_BYTEA", + &ConcreteDataType::String(_) => "_VARCHAR", + &ConcreteDataType::Date(_) => "_DATE", + &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP", + &ConcreteDataType::Time(_) => "_TIME", + &ConcreteDataType::Interval(_) => "_INTERVAL", + &ConcreteDataType::Decimal128(_) => "_NUMERIC", + &ConcreteDataType::Json(_) => "_JSON", + &ConcreteDataType::Duration(_) + | &ConcreteDataType::Dictionary(_) + | &ConcreteDataType::Vector(_) + | &ConcreteDataType::List(_) => "UNKNOWN", + }, + &ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN", + } + } } impl From<&ConcreteDataType> for ConcreteDataType { diff --git a/src/datatypes/src/error.rs b/src/datatypes/src/error.rs index 705e5d9682..0f5a82e44f 100644 --- a/src/datatypes/src/error.rs +++ b/src/datatypes/src/error.rs @@ -232,6 +232,12 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + #[snafu(display("Invalid skipping index option: {}", msg))] + InvalidSkippingIndexOption { + msg: String, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -252,7 +258,8 @@ impl ErrorExt for Error { | InvalidPrecisionOrScale { .. } | InvalidJson { .. } | InvalidVector { .. } - | InvalidFulltextOption { .. } => StatusCode::InvalidArguments, + | InvalidFulltextOption { .. } + | InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments, ValueExceedsPrecision { .. } | CastType { .. } diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 2eaa0254fb..c537a4608b 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -28,10 +28,11 @@ use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; use crate::prelude::ConcreteDataType; pub use crate::schema::column_schema::{ - ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, + ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, - TIME_INDEX_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, + SKIPPING_INDEX_KEY, TIME_INDEX_KEY, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index c1e2df8469..7a96ab5e2b 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -39,12 +39,20 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint"; pub const FULLTEXT_KEY: &str = "greptime:fulltext"; /// Key used to store whether the column has inverted index in arrow field's metadata. pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index"; +/// Key used to store skip options in arrow field's metadata. +pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index"; /// Keys used in fulltext options pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; +/// Keys used in SKIPPING index options +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type"; + +pub const DEFAULT_GRANULARITY: u32 = 10240; + /// Schema of a column, used as an immutable struct. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ColumnSchema { @@ -156,6 +164,10 @@ impl ColumnSchema { .unwrap_or(false) } + pub fn has_fulltext_index_key(&self) -> bool { + self.metadata.contains_key(FULLTEXT_KEY) + } + pub fn has_inverted_index_key(&self) -> bool { self.metadata.contains_key(INVERTED_INDEX_KEY) } @@ -298,6 +310,34 @@ impl ColumnSchema { ); Ok(()) } + + /// Retrieves the skipping index options for the column. + pub fn skipping_index_options(&self) -> Result> { + match self.metadata.get(SKIPPING_INDEX_KEY) { + None => Ok(None), + Some(json) => { + let options = + serde_json::from_str(json).context(error::DeserializeSnafu { json })?; + Ok(Some(options)) + } + } + } + + pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(&options).context(error::SerializeSnafu)?, + ); + Ok(self) + } + + pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(options).context(error::SerializeSnafu)?, + ); + Ok(()) + } } /// Column extended type set in column schema's metadata. @@ -495,6 +535,76 @@ impl fmt::Display for FulltextAnalyzer { } } +/// Skipping options for a column. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] +#[serde(rename_all = "kebab-case")] +pub struct SkippingIndexOptions { + /// The granularity of the skip index. + pub granularity: u32, + /// The type of the skip index. + #[serde(default)] + pub index_type: SkipIndexType, +} + +impl fmt::Display for SkippingIndexOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "granularity={}", self.granularity)?; + write!(f, ", index_type={}", self.index_type)?; + Ok(()) + } +} + +/// Skip index types. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)] +pub enum SkipIndexType { + #[default] + BloomFilter, +} + +impl fmt::Display for SkipIndexType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SkipIndexType::BloomFilter => write!(f, "BLOOM"), + } + } +} + +impl TryFrom> for SkippingIndexOptions { + type Error = Error; + + fn try_from(options: HashMap) -> Result { + // Parse granularity with default value 1 + let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) { + Some(value) => value.parse::().map_err(|_| { + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid granularity: {value}, expected: positive integer"), + } + .build() + })?, + None => DEFAULT_GRANULARITY, + }; + + // Parse index type with default value BloomFilter + let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) { + Some(typ) => match typ.to_ascii_uppercase().as_str() { + "BLOOM" => SkipIndexType::BloomFilter, + _ => { + return error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"), + } + .fail(); + } + }, + None => SkipIndexType::default(), + }; + + Ok(SkippingIndexOptions { + granularity, + index_type, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/file-engine/Cargo.toml b/src/file-engine/Cargo.toml index f9cd1113f5..1a665d6676 100644 --- a/src/file-engine/Cargo.toml +++ b/src/file-engine/Cargo.toml @@ -38,5 +38,4 @@ tokio.workspace = true [dev-dependencies] api.workspace = true -common-procedure-test.workspace = true common-test-util.workspace = true diff --git a/src/file-engine/src/manifest.rs b/src/file-engine/src/manifest.rs index 6310c3ccb9..6bf5ee104b 100644 --- a/src/file-engine/src/manifest.rs +++ b/src/file-engine/src/manifest.rs @@ -46,7 +46,7 @@ impl FileRegionManifest { pub async fn store(&self, region_dir: &str, object_store: &ObjectStore) -> Result<()> { let path = ®ion_manifest_path(region_dir); let exist = object_store - .is_exist(path) + .exists(path) .await .context(CheckObjectSnafu { path })?; ensure!(!exist, ManifestExistsSnafu { path }); diff --git a/src/file-engine/src/region.rs b/src/file-engine/src/region.rs index a5af682228..673d352b1e 100644 --- a/src/file-engine/src/region.rs +++ b/src/file-engine/src/region.rs @@ -130,7 +130,7 @@ mod tests { assert_eq!(region.metadata.primary_key, vec![1]); assert!(object_store - .is_exist("create_region_dir/manifest/_file_manifest") + .exists("create_region_dir/manifest/_file_manifest") .await .unwrap()); @@ -198,13 +198,13 @@ mod tests { .unwrap(); assert!(object_store - .is_exist("drop_region_dir/manifest/_file_manifest") + .exists("drop_region_dir/manifest/_file_manifest") .await .unwrap()); FileRegion::drop(®ion, &object_store).await.unwrap(); assert!(!object_store - .is_exist("drop_region_dir/manifest/_file_manifest") + .exists("drop_region_dir/manifest/_file_manifest") .await .unwrap()); diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index ed2a1dc1c4..ffba0618da 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -47,7 +47,6 @@ hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "m itertools.workspace = true lazy_static.workspace = true meta-client.workspace = true -minstant = "0.1.7" nom = "7.1.3" num-traits = "0.2" operator.workspace = true diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 80d03e2770..7d9ae5e422 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -206,28 +206,6 @@ impl DiffRequest { } } -/// iterate through the diff row and form continuous diff row with same diff type -pub fn diff_row_to_request(rows: Vec) -> Vec { - let mut reqs = Vec::new(); - for (row, ts, diff) in rows { - let last = reqs.last_mut(); - match (last, diff) { - (Some(DiffRequest::Insert(rows)), 1) => { - rows.push((row, ts)); - } - (Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])), - (Some(DiffRequest::Delete(rows)), -1) => { - rows.push((row, ts)); - } - (Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])), - (None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])), - (None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])), - _ => {} - } - } - reqs -} - pub fn batches_to_rows_req(batches: Vec) -> Result, Error> { let mut reqs = Vec::new(); for batch in batches { diff --git a/src/flow/src/compute/render/src_sink.rs b/src/flow/src/compute/render/src_sink.rs index 62e733420b..cc8cf01ff7 100644 --- a/src/flow/src/compute/render/src_sink.rs +++ b/src/flow/src/compute/render/src_sink.rs @@ -14,7 +14,7 @@ //! Source and Sink for the dataflow -use std::collections::{BTreeMap, VecDeque}; +use std::collections::BTreeMap; use common_telemetry::{debug, trace}; use hydroflow::scheduled::graph_ext::GraphExt; @@ -28,7 +28,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff}; use crate::error::{Error, PlanSnafu}; use crate::expr::error::InternalSnafu; use crate::expr::{Batch, EvalError}; -use crate::repr::{DiffRow, Row, BROADCAST_CAP}; +use crate::repr::{DiffRow, Row}; #[allow(clippy::mutable_key_type)] impl Context<'_, '_> { @@ -242,44 +242,4 @@ impl Context<'_, '_> { }, ); } - - /// Render a sink which send updates to broadcast channel, have internal buffer in case broadcast channel is full - pub fn render_sink(&mut self, bundle: CollectionBundle, sender: broadcast::Sender) { - let CollectionBundle { - collection, - arranged: _, - } = bundle; - let mut buf = VecDeque::with_capacity(1000); - - let schd = self.compute_state.get_scheduler(); - let inner_schd = schd.clone(); - let now = self.compute_state.current_time_ref(); - - let sink = self - .df - .add_subgraph_sink("Sink", collection.into_inner(), move |_ctx, recv| { - let data = recv.take_inner(); - buf.extend(data.into_iter().flat_map(|i| i.into_iter())); - if sender.len() >= BROADCAST_CAP { - return; - } else { - while let Some(row) = buf.pop_front() { - // if the sender is full, stop sending - if sender.len() >= BROADCAST_CAP { - break; - } - // TODO(discord9): handling tokio broadcast error - let _ = sender.send(row); - } - } - - // if buffer is not empty, schedule the next run at next tick - // so the buffer can be drained as soon as possible - if !buf.is_empty() { - inner_schd.schedule_at(*now.borrow() + 1); - } - }); - - schd.set_cur_subgraph(sink); - } } diff --git a/src/flow/src/compute/types.rs b/src/flow/src/compute/types.rs index 00ed660a6e..e125a2d272 100644 --- a/src/flow/src/compute/types.rs +++ b/src/flow/src/compute/types.rs @@ -82,22 +82,6 @@ impl Arranged { writer: self.writer.clone(), }) } - - /// Copy the full arrangement, including the future and the current updates. - /// - /// Internally `Rc-ed` so it's cheap to copy - pub fn try_copy_full(&self) -> Option { - self.arrangement - .clone_full_arrange() - .map(|arrangement| Arranged { - arrangement, - readers: self.readers.clone(), - writer: self.writer.clone(), - }) - } - pub fn add_reader(&self, id: SubgraphId) { - self.readers.borrow_mut().push(id) - } } /// A bundle of the various ways a collection can be represented. diff --git a/src/flow/src/expr/error.rs b/src/flow/src/expr/error.rs index 4b69b3df23..992d5c5921 100644 --- a/src/flow/src/expr/error.rs +++ b/src/flow/src/expr/error.rs @@ -21,11 +21,6 @@ use datafusion_common::DataFusionError; use datatypes::data_type::ConcreteDataType; use snafu::{Location, Snafu}; -fn is_send_sync() { - fn check() {} - check::(); -} - /// EvalError is about errors happen on columnar evaluation /// /// TODO(discord9): add detailed location of column/operator(instead of code) to errors tp help identify related column diff --git a/src/flow/src/expr/linear.rs b/src/flow/src/expr/linear.rs index 8e220f7d86..373e467aba 100644 --- a/src/flow/src/expr/linear.rs +++ b/src/flow/src/expr/linear.rs @@ -359,14 +359,6 @@ impl MapFilterProject { ) } - /// Convert the `MapFilterProject` into a staged evaluation plan. - /// - /// The main behavior is extract temporal predicates, which cannot be evaluated - /// using the standard machinery. - pub fn into_plan(self) -> Result { - MfpPlan::create_from(self) - } - /// Lists input columns whose values are used in outputs. /// /// It is entirely appropriate to determine the demand of an instance @@ -602,26 +594,6 @@ impl SafeMfpPlan { } } - /// A version of `evaluate` which produces an iterator over `Datum` - /// as output. - /// - /// This version can be useful when one wants to capture the resulting - /// datums without packing and then unpacking a row. - #[inline(always)] - pub fn evaluate_iter<'a>( - &'a self, - datums: &'a mut Vec, - ) -> Result + 'a>, EvalError> { - let passed_predicates = self.evaluate_inner(datums)?; - if !passed_predicates { - Ok(None) - } else { - Ok(Some( - self.mfp.projection.iter().map(move |i| datums[*i].clone()), - )) - } - } - /// Populates `values` with `self.expressions` and tests `self.predicates`. /// /// This does not apply `self.projection`, which is up to the calling method. diff --git a/src/flow/src/plan.rs b/src/flow/src/plan.rs index dc86b984ed..e1cf22e621 100644 --- a/src/flow/src/plan.rs +++ b/src/flow/src/plan.rs @@ -18,10 +18,8 @@ mod join; mod reduce; -use std::collections::BTreeSet; - use crate::error::Error; -use crate::expr::{GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr}; +use crate::expr::{Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr}; use crate::plan::join::JoinPlan; pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan}; use crate::repr::{DiffRow, RelationDesc}; @@ -186,48 +184,6 @@ pub enum Plan { }, } -impl Plan { - /// Find all the used collection in the plan - pub fn find_used_collection(&self) -> BTreeSet { - fn recur_find_use(plan: &Plan, used: &mut BTreeSet) { - match plan { - Plan::Get { id } => { - match id { - Id::Local(_) => (), - Id::Global(g) => { - used.insert(*g); - } - }; - } - Plan::Let { value, body, .. } => { - recur_find_use(&value.plan, used); - recur_find_use(&body.plan, used); - } - Plan::Mfp { input, .. } => { - recur_find_use(&input.plan, used); - } - Plan::Reduce { input, .. } => { - recur_find_use(&input.plan, used); - } - Plan::Join { inputs, .. } => { - for input in inputs { - recur_find_use(&input.plan, used); - } - } - Plan::Union { inputs, .. } => { - for input in inputs { - recur_find_use(&input.plan, used); - } - } - _ => {} - } - } - let mut ret = Default::default(); - recur_find_use(self, &mut ret); - ret - } -} - impl Plan { pub fn with_types(self, schema: RelationDesc) -> TypedPlan { TypedPlan { schema, plan: self } diff --git a/src/flow/src/repr/relation.rs b/src/flow/src/repr/relation.rs index 54ad1c5e8e..d0fbb861eb 100644 --- a/src/flow/src/repr/relation.rs +++ b/src/flow/src/repr/relation.rs @@ -46,14 +46,6 @@ impl Key { self.column_indices.push(col); } - /// Add columns to Key - pub fn add_cols(&mut self, cols: I) - where - I: IntoIterator, - { - self.column_indices.extend(cols); - } - /// Remove a column from Key pub fn remove_col(&mut self, col: usize) { self.column_indices.retain(|&r| r != col); diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 01f06eb033..e21819c568 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -25,7 +25,6 @@ common-catalog.workspace = true common-config.workspace = true common-datasource.workspace = true common-error.workspace = true -common-frontend.workspace = true common-function.workspace = true common-grpc.workspace = true common-macro.workspace = true @@ -71,7 +70,6 @@ common-test-util.workspace = true datanode.workspace = true datatypes.workspace = true futures = "0.3" -meta-srv = { workspace = true, features = ["mock"] } serde_json.workspace = true strfmt = "0.2" tower.workspace = true diff --git a/src/frontend/src/instance/log_handler.rs b/src/frontend/src/instance/log_handler.rs index c3422066a3..2da2d6717d 100644 --- a/src/frontend/src/instance/log_handler.rs +++ b/src/frontend/src/instance/log_handler.rs @@ -19,14 +19,16 @@ use async_trait::async_trait; use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq}; use client::Output; use common_error::ext::BoxedError; +use pipeline::pipeline_operator::PipelineOperator; use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion}; use servers::error::{ AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult, }; use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef}; use servers::query_handler::PipelineHandler; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextRef}; use snafu::ResultExt; +use table::Table; use crate::instance::Instance; @@ -84,6 +86,22 @@ impl PipelineHandler for Instance { .await .context(PipelineSnafu) } + + async fn get_table( + &self, + table: &str, + query_ctx: &QueryContext, + ) -> std::result::Result>, catalog::error::Error> { + let catalog = query_ctx.current_catalog(); + let schema = query_ctx.current_schema(); + self.catalog_manager + .table(catalog, &schema, table, None) + .await + } + + fn build_pipeline(&self, pipeline: &str) -> ServerResult> { + PipelineOperator::build_pipeline(pipeline).context(PipelineSnafu) + } } impl Instance { diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 772177147a..f46c64a176 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -17,6 +17,7 @@ common-error.workspace = true common-macro.workspace = true common-runtime.workspace = true common-telemetry.workspace = true +fastbloom = "0.8" fst.workspace = true futures.workspace = true greptime-proto.workspace = true @@ -26,6 +27,7 @@ prost.workspace = true regex.workspace = true regex-automata.workspace = true serde.workspace = true +serde_json.workspace = true snafu.workspace = true tantivy = { version = "0.22", features = ["zstd-compression"] } tantivy-jieba = "0.11.0" diff --git a/src/index/src/bloom_filter.rs b/src/index/src/bloom_filter.rs new file mode 100644 index 0000000000..e68acc698a --- /dev/null +++ b/src/index/src/bloom_filter.rs @@ -0,0 +1,53 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::{Deserialize, Serialize}; + +pub mod creator; +mod error; + +pub type Bytes = Vec; +pub type BytesRef<'a> = &'a [u8]; + +/// The Meta information of the bloom filter stored in the file. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct BloomFilterMeta { + /// The number of rows per segment. + pub rows_per_segment: usize, + + /// The number of segments. + pub seg_count: usize, + + /// The number of total rows. + pub row_count: usize, + + /// The size of the bloom filter excluding the meta information. + pub bloom_filter_segments_size: usize, + + /// Offset and size of bloom filters in the file. + pub bloom_filter_segments: Vec, +} + +/// The location of the bloom filter segment in the file. +#[derive(Debug, Serialize, Deserialize)] +pub struct BloomFilterSegmentLocation { + /// The offset of the bloom filter segment in the file. + pub offset: u64, + + /// The size of the bloom filter segment in the file. + pub size: u64, + + /// The number of elements in the bloom filter segment. + pub elem_count: usize, +} diff --git a/src/index/src/bloom_filter/creator.rs b/src/index/src/bloom_filter/creator.rs new file mode 100644 index 0000000000..b3c95d3a76 --- /dev/null +++ b/src/index/src/bloom_filter/creator.rs @@ -0,0 +1,294 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use fastbloom::BloomFilter; +use futures::{AsyncWrite, AsyncWriteExt}; +use snafu::ResultExt; + +use super::error::{IoSnafu, SerdeJsonSnafu}; +use crate::bloom_filter::error::Result; +use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes}; + +/// The seed used for the Bloom filter. +const SEED: u128 = 42; + +/// The false positive rate of the Bloom filter. +const FALSE_POSITIVE_RATE: f64 = 0.01; + +/// `BloomFilterCreator` is responsible for creating and managing bloom filters +/// for a set of elements. It divides the rows into segments and creates +/// bloom filters for each segment. +/// +/// # Format +/// +/// The bloom filter creator writes the following format to the writer: +/// +/// ```text +/// +--------------------+--------------------+-----+----------------------+----------------------+ +/// | Bloom filter 0 | Bloom filter 1 | ... | BloomFilterMeta | Meta size | +/// +--------------------+--------------------+-----+----------------------+----------------------+ +/// |<- bytes (size 0) ->|<- bytes (size 1) ->| ... |<- json (meta size) ->|<- u32 LE (4 bytes) ->| +/// ``` +/// +pub struct BloomFilterCreator { + /// The number of rows per segment set by the user. + rows_per_segment: usize, + + /// Row count that added to the bloom filter so far. + accumulated_row_count: usize, + + /// A set of distinct elements in the current segment. + cur_seg_distinct_elems: HashSet, + + /// The memory usage of the current segment's distinct elements. + cur_seg_distinct_elems_mem_usage: usize, + + /// Storage for finalized Bloom filters. + finalized_bloom_filters: FinalizedBloomFilterStorage, +} + +impl BloomFilterCreator { + /// Creates a new `BloomFilterCreator` with the specified number of rows per segment. + /// + /// # PANICS + /// + /// `rows_per_segment` <= 0 + pub fn new(rows_per_segment: usize) -> Self { + assert!( + rows_per_segment > 0, + "rows_per_segment must be greater than 0" + ); + + Self { + rows_per_segment, + accumulated_row_count: 0, + cur_seg_distinct_elems: HashSet::default(), + cur_seg_distinct_elems_mem_usage: 0, + finalized_bloom_filters: FinalizedBloomFilterStorage::default(), + } + } + + /// Adds a row of elements to the bloom filter. If the number of accumulated rows + /// reaches `rows_per_segment`, it finalizes the current segment. + pub fn push_row_elems(&mut self, elems: impl IntoIterator) { + self.accumulated_row_count += 1; + for elem in elems.into_iter() { + let len = elem.len(); + let is_new = self.cur_seg_distinct_elems.insert(elem); + if is_new { + self.cur_seg_distinct_elems_mem_usage += len; + } + } + + if self.accumulated_row_count % self.rows_per_segment == 0 { + self.finalize_segment(); + } + } + + /// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer. + pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> { + if !self.cur_seg_distinct_elems.is_empty() { + self.finalize_segment(); + } + + let mut meta = BloomFilterMeta { + rows_per_segment: self.rows_per_segment, + seg_count: self.finalized_bloom_filters.len(), + row_count: self.accumulated_row_count, + ..Default::default() + }; + + let mut buf = Vec::new(); + for segment in self.finalized_bloom_filters.drain() { + let slice = segment.bloom_filter.as_slice(); + buf.clear(); + write_u64_slice(&mut buf, slice); + writer.write_all(&buf).await.context(IoSnafu)?; + + let size = buf.len(); + meta.bloom_filter_segments.push(BloomFilterSegmentLocation { + offset: meta.bloom_filter_segments_size as _, + size: size as _, + elem_count: segment.element_count, + }); + meta.bloom_filter_segments_size += size; + } + + let meta_bytes = serde_json::to_vec(&meta).context(SerdeJsonSnafu)?; + writer.write_all(&meta_bytes).await.context(IoSnafu)?; + + let meta_size = meta_bytes.len() as u32; + writer + .write_all(&meta_size.to_le_bytes()) + .await + .context(IoSnafu)?; + writer.flush().await.unwrap(); + + Ok(()) + } + + /// Returns the memory usage of the creating bloom filter. + pub fn memory_usage(&self) -> usize { + self.cur_seg_distinct_elems_mem_usage + self.finalized_bloom_filters.memory_usage() + } + + fn finalize_segment(&mut self) { + let elem_count = self.cur_seg_distinct_elems.len(); + self.finalized_bloom_filters + .add(self.cur_seg_distinct_elems.drain(), elem_count); + self.cur_seg_distinct_elems_mem_usage = 0; + } +} + +/// Storage for finalized Bloom filters. +/// +/// TODO(zhongzc): Add support for storing intermediate bloom filters on disk to control memory usage. +#[derive(Debug, Default)] +struct FinalizedBloomFilterStorage { + /// Bloom filters that are stored in memory. + in_memory: Vec, +} + +impl FinalizedBloomFilterStorage { + fn memory_usage(&self) -> usize { + self.in_memory.iter().map(|s| s.size).sum() + } + + /// Adds a new finalized Bloom filter to the storage. + /// + /// TODO(zhongzc): Add support for flushing to disk. + fn add(&mut self, elems: impl IntoIterator, elem_count: usize) { + let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE) + .seed(&SEED) + .expected_items(elem_count); + for elem in elems.into_iter() { + bf.insert(&elem); + } + + let cbf = FinalizedBloomFilterSegment::new(bf, elem_count); + self.in_memory.push(cbf); + } + + fn len(&self) -> usize { + self.in_memory.len() + } + + fn drain(&mut self) -> impl Iterator + '_ { + self.in_memory.drain(..) + } +} + +/// A finalized Bloom filter segment. +#[derive(Debug)] +struct FinalizedBloomFilterSegment { + /// The underlying Bloom filter. + bloom_filter: BloomFilter, + + /// The number of elements in the Bloom filter. + element_count: usize, + + /// The occupied memory size of the Bloom filter. + size: usize, +} + +impl FinalizedBloomFilterSegment { + fn new(bloom_filter: BloomFilter, elem_count: usize) -> Self { + let memory_usage = std::mem::size_of_val(bloom_filter.as_slice()); + Self { + bloom_filter, + element_count: elem_count, + size: memory_usage, + } + } +} + +/// Writes a slice of `u64` to the buffer in little-endian order. +fn write_u64_slice(buf: &mut Vec, slice: &[u64]) { + buf.reserve(std::mem::size_of_val(slice)); + for &x in slice { + buf.extend_from_slice(&x.to_le_bytes()); + } +} + +#[cfg(test)] +mod tests { + use futures::io::Cursor; + + use super::*; + + fn u64_vec_from_bytes(bytes: &[u8]) -> Vec { + bytes + .chunks_exact(std::mem::size_of::()) + .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap())) + .collect() + } + + #[tokio::test] + async fn test_bloom_filter_creator() { + let mut writer = Cursor::new(Vec::new()); + let mut creator = BloomFilterCreator::new(2); + + creator.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()]); + assert!(creator.cur_seg_distinct_elems_mem_usage > 0); + assert!(creator.memory_usage() > 0); + + creator.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()]); + // Finalize the first segment + assert!(creator.cur_seg_distinct_elems_mem_usage == 0); + assert!(creator.memory_usage() > 0); + + creator.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()]); + assert!(creator.cur_seg_distinct_elems_mem_usage > 0); + assert!(creator.memory_usage() > 0); + + creator.finish(&mut writer).await.unwrap(); + + let bytes = writer.into_inner(); + let total_size = bytes.len(); + let meta_size_offset = total_size - 4; + let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap()); + + let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4]; + let meta: BloomFilterMeta = serde_json::from_slice(meta_bytes).unwrap(); + + assert_eq!(meta.rows_per_segment, 2); + assert_eq!(meta.seg_count, 2); + assert_eq!(meta.row_count, 3); + assert_eq!( + meta.bloom_filter_segments_size + meta_bytes.len() + 4, + total_size + ); + + let mut bfs = Vec::new(); + for segment in meta.bloom_filter_segments { + let bloom_filter_bytes = + &bytes[segment.offset as usize..(segment.offset + segment.size) as usize]; + let v = u64_vec_from_bytes(bloom_filter_bytes); + let bloom_filter = BloomFilter::from_vec(v) + .seed(&SEED) + .expected_items(segment.elem_count); + bfs.push(bloom_filter); + } + + assert_eq!(bfs.len(), 2); + assert!(bfs[0].contains(&b"a")); + assert!(bfs[0].contains(&b"b")); + assert!(bfs[0].contains(&b"c")); + assert!(bfs[0].contains(&b"d")); + assert!(bfs[1].contains(&b"e")); + assert!(bfs[1].contains(&b"f")); + } +} diff --git a/src/index/src/bloom_filter/error.rs b/src/index/src/bloom_filter/error.rs new file mode 100644 index 0000000000..8e95dc5225 --- /dev/null +++ b/src/index/src/bloom_filter/error.rs @@ -0,0 +1,66 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; + +use common_error::ext::{BoxedError, ErrorExt}; +use common_error::status_code::StatusCode; +use common_macro::stack_trace_debug; +use snafu::{Location, Snafu}; + +#[derive(Snafu)] +#[snafu(visibility(pub))] +#[stack_trace_debug] +pub enum Error { + #[snafu(display("IO error"))] + Io { + #[snafu(source)] + error: std::io::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to serde json"))] + SerdeJson { + #[snafu(source)] + error: serde_json::error::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("External error"))] + External { + source: BoxedError, + #[snafu(implicit)] + location: Location, + }, +} + +impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + use Error::*; + + match self { + Io { .. } | Self::SerdeJson { .. } => StatusCode::Unexpected, + + External { source, .. } => source.status_code(), + } + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +pub type Result = std::result::Result; diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 07a42b8b87..7e861beda6 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -26,14 +26,6 @@ use crate::inverted_index::search::predicate::Predicate; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Failed to seek"))] - Seek { - #[snafu(source)] - error: IoError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to read"))] Read { #[snafu(source)] @@ -76,6 +68,18 @@ pub enum Error { location: Location, }, + #[snafu(display("Blob size too small"))] + BlobSizeTooSmall { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid footer payload size"))] + InvalidFooterPayloadSize { + #[snafu(implicit)] + location: Location, + }, + #[snafu(display("Unexpected inverted index footer payload size, max: {max_payload_size}, actual: {actual_payload_size}"))] UnexpectedFooterPayloadSize { max_payload_size: u64, @@ -215,8 +219,7 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - Seek { .. } - | Read { .. } + Read { .. } | Write { .. } | Flush { .. } | Close { .. } @@ -229,7 +232,9 @@ impl ErrorExt for Error { | KeysApplierUnexpectedPredicates { .. } | CommonIo { .. } | UnknownIntermediateCodecMagic { .. } - | FstCompile { .. } => StatusCode::Unexpected, + | FstCompile { .. } + | InvalidFooterPayloadSize { .. } + | BlobSizeTooSmall { .. } => StatusCode::Unexpected, ParseRegex { .. } | ParseDFA { .. } diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index a6fb0cecbf..21e5487d1e 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use common_base::BitVec; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::ResultExt; @@ -30,23 +32,23 @@ mod footer; #[mockall::automock] #[async_trait] pub trait InvertedIndexReader: Send { - /// Reads all data to dest. - async fn read_all(&mut self, dest: &mut Vec) -> Result; - /// Seeks to given offset and reads data with exact size as provided. - async fn seek_read(&mut self, offset: u64, size: u32) -> Result>; + async fn range_read(&mut self, offset: u64, size: u32) -> Result>; + + /// Reads the bytes in the given ranges. + async fn read_vec(&mut self, ranges: &[Range]) -> Result>; /// Retrieves metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result>; /// Retrieves the finite state transducer (FST) map from the given offset and size. async fn fst(&mut self, offset: u64, size: u32) -> Result { - let fst_data = self.seek_read(offset, size).await?; + let fst_data = self.range_read(offset, size).await?; FstMap::new(fst_data).context(DecodeFstSnafu) } /// Retrieves the bitmap from the given offset and size. async fn bitmap(&mut self, offset: u64, size: u32) -> Result { - self.seek_read(offset, size).await.map(BitVec::from_vec) + self.range_read(offset, size).await.map(BitVec::from_vec) } } diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index ace0e5c485..fcaa63773d 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -12,15 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use common_base::range_read::RangeReader; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; +use super::footer::DEFAULT_PREFETCH_SIZE; use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu}; -use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader; +use crate::inverted_index::format::reader::footer::InvertedIndexFooterReader; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::format::MIN_BLOB_SIZE; @@ -49,16 +52,7 @@ impl InvertedIndexBlobReader { #[async_trait] impl InvertedIndexReader for InvertedIndexBlobReader { - async fn read_all(&mut self, dest: &mut Vec) -> Result { - let metadata = self.source.metadata().await.context(CommonIoSnafu)?; - self.source - .read_into(0..metadata.content_length, dest) - .await - .context(CommonIoSnafu)?; - Ok(metadata.content_length as usize) - } - - async fn seek_read(&mut self, offset: u64, size: u32) -> Result> { + async fn range_read(&mut self, offset: u64, size: u32) -> Result> { let buf = self .source .read(offset..offset + size as u64) @@ -67,12 +61,17 @@ impl InvertedIndexReader for InvertedIndexBlobReader { Ok(buf.into()) } + async fn read_vec(&mut self, ranges: &[Range]) -> Result> { + self.source.read_vec(ranges).await.context(CommonIoSnafu) + } + async fn metadata(&mut self) -> Result> { let metadata = self.source.metadata().await.context(CommonIoSnafu)?; let blob_size = metadata.content_length; Self::validate_blob_size(blob_size)?; - let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size); + let mut footer_reader = InvertedIndexFooterReader::new(&mut self.source, blob_size) + .with_prefetch_size(DEFAULT_PREFETCH_SIZE); footer_reader.metadata().await.map(Arc::new) } } diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 1f35237711..c025ecf52e 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -18,53 +18,88 @@ use prost::Message; use snafu::{ensure, ResultExt}; use crate::inverted_index::error::{ - CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu, - UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu, + BlobSizeTooSmallSnafu, CommonIoSnafu, DecodeProtoSnafu, InvalidFooterPayloadSizeSnafu, Result, + UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu, + UnexpectedZeroSegmentRowCountSnafu, }; use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE; -/// InvertedIndeFooterReader is for reading the footer section of the blob. -pub struct InvertedIndeFooterReader { +pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB + +/// InvertedIndexFooterReader is for reading the footer section of the blob. +pub struct InvertedIndexFooterReader { source: R, blob_size: u64, + prefetch_size: Option, } -impl InvertedIndeFooterReader { +impl InvertedIndexFooterReader { pub fn new(source: R, blob_size: u64) -> Self { - Self { source, blob_size } + Self { + source, + blob_size, + prefetch_size: None, + } + } + + /// Set the prefetch size for the footer reader. + pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self { + self.prefetch_size = Some(prefetch_size.max(FOOTER_PAYLOAD_SIZE_SIZE)); + self + } + + pub fn prefetch_size(&self) -> u64 { + self.prefetch_size.unwrap_or(FOOTER_PAYLOAD_SIZE_SIZE) } } -impl InvertedIndeFooterReader { +impl InvertedIndexFooterReader { pub async fn metadata(&mut self) -> Result { - let payload_size = self.read_payload_size().await?; - let metas = self.read_payload(payload_size).await?; - Ok(metas) - } + ensure!( + self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE, + BlobSizeTooSmallSnafu + ); - async fn read_payload_size(&mut self) -> Result { - let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize]; - let end = self.blob_size; - let start = end - FOOTER_PAYLOAD_SIZE_SIZE; - self.source - .read_into(start..end, &mut &mut size_buf[..]) + let footer_start = self.blob_size.saturating_sub(self.prefetch_size()); + let suffix = self + .source + .read(footer_start..self.blob_size) .await .context(CommonIoSnafu)?; + let suffix_len = suffix.len(); + let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64; + self.validate_payload_size(length)?; - let payload_size = u32::from_le_bytes(size_buf) as u64; - self.validate_payload_size(payload_size)?; + let footer_size = FOOTER_PAYLOAD_SIZE_SIZE; - Ok(payload_size) + // Did not fetch the entire file metadata in the initial read, need to make a second request. + if length > suffix_len as u64 - footer_size { + let metadata_start = self.blob_size - length - footer_size; + let meta = self + .source + .read(metadata_start..self.blob_size - footer_size) + .await + .context(CommonIoSnafu)?; + self.parse_payload(&meta, length) + } else { + let metadata_start = self.blob_size - length - footer_size - footer_start; + let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize]; + self.parse_payload(meta, length) + } } - async fn read_payload(&mut self, payload_size: u64) -> Result { - let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE; - let start = end - payload_size; - let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?; + fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> { + let suffix_len = suffix.len(); + ensure!(suffix_len >= 4, InvalidFooterPayloadSizeSnafu); + let mut bytes = [0; 4]; + bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]); - let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?; + Ok(bytes) + } + + fn parse_payload(&mut self, bytes: &[u8], payload_size: u64) -> Result { + let metas = InvertedIndexMetas::decode(bytes).context(DecodeProtoSnafu)?; self.validate_metas(&metas, payload_size)?; - Ok(metas) } @@ -113,9 +148,12 @@ impl InvertedIndeFooterReader { #[cfg(test)] mod tests { + use std::assert_matches::assert_matches; + use prost::Message; use super::*; + use crate::inverted_index::error::Error; fn create_test_payload(meta: InvertedIndexMeta) -> Vec { let mut metas = InvertedIndexMetas { @@ -141,14 +179,18 @@ mod tests { let mut payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size = reader.read_payload_size().await.unwrap(); - let metas = reader.read_payload(payload_size).await.unwrap(); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } - assert_eq!(metas.metas.len(), 1); - let index_meta = &metas.metas.get("test").unwrap(); - assert_eq!(index_meta.name, "test"); + let metas = reader.metadata().await.unwrap(); + assert_eq!(metas.metas.len(), 1); + let index_meta = &metas.metas.get("test").unwrap(); + assert_eq!(index_meta.name, "test"); + } } #[tokio::test] @@ -157,14 +199,20 @@ mod tests { name: "test".to_string(), ..Default::default() }; - let mut payload_buf = create_test_payload(meta); payload_buf.push(0xff); // Add an extra byte to corrupt the footer let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size_result = reader.read_payload_size().await; - assert!(payload_size_result.is_err()); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let blob_size = payload_buf.len() as u64; + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } + + let result = reader.metadata().await; + assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. })); + } } #[tokio::test] @@ -178,10 +226,15 @@ mod tests { let mut payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size = reader.read_payload_size().await.unwrap(); - let payload_result = reader.read_payload(payload_size).await; - assert!(payload_result.is_err()); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } + + let result = reader.metadata().await; + assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. })); + } } } diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index 197fc01818..e52a93138f 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -13,6 +13,8 @@ // limitations under the License. #![feature(iter_partition_in_place)] +#![feature(assert_matches)] +pub mod bloom_filter; pub mod fulltext_index; pub mod inverted_index; diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index da614ac9b9..c7dcd81e9f 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -204,10 +204,6 @@ impl Context { pub fn reset_in_memory(&self) { self.in_memory.reset(); } - - pub fn reset_leader_cached_kv_backend(&self) { - self.leader_cached_kv_backend.reset(); - } } /// The value of the leader. It is used to store the leader's address. diff --git a/src/meta-srv/src/mocks.rs b/src/meta-srv/src/mocks.rs index cf9144dc39..9611fcdd13 100644 --- a/src/meta-srv/src/mocks.rs +++ b/src/meta-srv/src/mocks.rs @@ -52,11 +52,6 @@ pub async fn mock_with_etcdstore(addr: &str) -> MockInfo { mock(Default::default(), kv_backend, None, None, None).await } -pub async fn mock_with_memstore_and_selector(selector: SelectorRef) -> MockInfo { - let kv_backend = Arc::new(MemoryKvBackend::new()); - mock(Default::default(), kv_backend, Some(selector), None, None).await -} - pub async fn mock( opts: MetasrvOptions, kv_backend: KvBackendRef, diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index 40df9401cb..3b27d33f22 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub(crate) mod close_downgraded_region; pub(crate) mod downgrade_leader_region; pub(crate) mod manager; pub(crate) mod migration_abort; @@ -43,6 +44,7 @@ use common_procedure::error::{ Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu, }; use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey}; +use common_telemetry::info; use manager::RegionMigrationProcedureGuard; pub use manager::{ RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker, @@ -91,7 +93,9 @@ impl PersistentContext { let lock_key = vec![ CatalogLock::Read(&self.catalog).into(), SchemaLock::read(&self.catalog, &self.schema).into(), - TableLock::Read(region_id.table_id()).into(), + // The optimistic updating of table route is not working very well, + // so we need to use the write lock here. + TableLock::Write(region_id.table_id()).into(), RegionLock::Write(region_id).into(), ]; @@ -253,7 +257,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get TableRoute: {table_id}"), })? .context(error::TableRouteNotFoundSnafu { table_id })?; @@ -317,7 +321,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get TableInfo: {table_id}"), })? .context(error::TableInfoNotFoundSnafu { table_id })?; @@ -350,7 +354,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"), })? .context(error::DatanodeTableNotFoundSnafu { @@ -364,12 +368,6 @@ impl Context { Ok(datanode_value.as_ref().unwrap()) } - /// Removes the `table_info` of [VolatileContext], returns true if any. - pub fn remove_table_info_value(&mut self) -> bool { - let value = self.volatile_ctx.table_info.take(); - value.is_some() - } - /// Returns the [RegionId]. pub fn region_id(&self) -> RegionId { self.persistent_ctx.region_id @@ -474,6 +472,48 @@ impl RegionMigrationProcedure { _guard: guard, }) } + + async fn rollback_inner(&mut self) -> Result<()> { + let _timer = METRIC_META_REGION_MIGRATION_EXECUTE + .with_label_values(&["rollback"]) + .start_timer(); + + let table_id = self.context.region_id().table_id(); + let region_id = self.context.region_id(); + self.context.remove_table_route_value(); + let table_metadata_manager = self.context.table_metadata_manager.clone(); + let table_route = self.context.get_table_route_value().await?; + + // Safety: It must be a physical table route. + let downgraded = table_route + .region_routes() + .unwrap() + .iter() + .filter(|route| route.region.id == region_id) + .any(|route| route.is_leader_downgrading()); + + if downgraded { + info!("Rollbacking downgraded region leader table route, region: {region_id}"); + table_metadata_manager + .update_leader_region_status(table_id, table_route, |route| { + if route.region.id == region_id { + Some(None) + } else { + None + } + }) + .await + .context(error::TableMetadataManagerSnafu) + .map_err(BoxedError::new) + .with_context(|_| error::RetryLaterWithSourceSnafu { + reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"), + })?; + } + + self.context.register_failure_detectors().await; + + Ok(()) + } } #[async_trait::async_trait] @@ -482,6 +522,16 @@ impl Procedure for RegionMigrationProcedure { Self::TYPE_NAME } + async fn rollback(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<()> { + self.rollback_inner() + .await + .map_err(ProcedureError::external) + } + + fn rollback_supported(&self) -> bool { + true + } + async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult { let state = &mut self.state; @@ -707,6 +757,12 @@ mod tests { Assertion::simple(assert_update_metadata_upgrade, assert_no_persist), ), // UpdateMetadata::Upgrade + Step::next( + "Should be the close downgraded region", + None, + Assertion::simple(assert_close_downgraded_region, assert_no_persist), + ), + // CloseDowngradedRegion Step::next( "Should be the region migration end", None, @@ -1077,6 +1133,12 @@ mod tests { Assertion::simple(assert_update_metadata_upgrade, assert_no_persist), ), // UpdateMetadata::Upgrade + Step::next( + "Should be the close downgraded region", + None, + Assertion::simple(assert_close_downgraded_region, assert_no_persist), + ), + // CloseDowngradedRegion Step::next( "Should be the region migration end", None, diff --git a/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs new file mode 100644 index 0000000000..9113607681 --- /dev/null +++ b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs @@ -0,0 +1,138 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::time::Duration; + +use api::v1::meta::MailboxMessage; +use common_meta::distributed_time_constants::MAILBOX_RTT_SECS; +use common_meta::instruction::{Instruction, InstructionReply, SimpleReply}; +use common_meta::key::datanode_table::RegionInfo; +use common_meta::RegionIdent; +use common_procedure::Status; +use common_telemetry::{info, warn}; +use serde::{Deserialize, Serialize}; +use snafu::ResultExt; + +use crate::error::{self, Result}; +use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::migration_end::RegionMigrationEnd; +use crate::procedure::region_migration::{Context, State}; +use crate::service::mailbox::Channel; + +const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(MAILBOX_RTT_SECS); + +#[derive(Debug, Serialize, Deserialize)] +pub struct CloseDowngradedRegion; + +#[async_trait::async_trait] +#[typetag::serde] +impl State for CloseDowngradedRegion { + async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { + if let Err(err) = self.close_downgraded_leader_region(ctx).await { + let downgrade_leader_datanode = &ctx.persistent_ctx.from_peer; + let region_id = ctx.region_id(); + warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode); + } + + Ok((Box::new(RegionMigrationEnd), Status::done())) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl CloseDowngradedRegion { + /// Builds close region instruction. + /// + /// Abort(non-retry): + /// - Datanode Table is not found. + async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result { + let pc = &ctx.persistent_ctx; + let downgrade_leader_datanode_id = pc.from_peer.id; + let cluster_id = pc.cluster_id; + let table_id = pc.region_id.table_id(); + let region_number = pc.region_id.region_number(); + let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?; + + let RegionInfo { engine, .. } = datanode_table_value.region_info.clone(); + + Ok(Instruction::CloseRegion(RegionIdent { + cluster_id, + datanode_id: downgrade_leader_datanode_id, + table_id, + region_number, + engine, + })) + } + + /// Closes the downgraded leader region. + async fn close_downgraded_leader_region(&self, ctx: &mut Context) -> Result<()> { + let close_instruction = self.build_close_region_instruction(ctx).await?; + let region_id = ctx.region_id(); + let pc = &ctx.persistent_ctx; + let downgrade_leader_datanode = &pc.from_peer; + let msg = MailboxMessage::json_message( + &format!("Close downgraded region: {}", region_id), + &format!("Meta@{}", ctx.server_addr()), + &format!( + "Datanode-{}@{}", + downgrade_leader_datanode.id, downgrade_leader_datanode.addr + ), + common_time::util::current_time_millis(), + &close_instruction, + ) + .with_context(|_| error::SerializeToJsonSnafu { + input: close_instruction.to_string(), + })?; + + let ch = Channel::Datanode(downgrade_leader_datanode.id); + let receiver = ctx + .mailbox + .send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT) + .await?; + + match receiver.await? { + Ok(msg) => { + let reply = HeartbeatMailbox::json_reply(&msg)?; + info!( + "Received close downgraded leade region reply: {:?}, region: {}", + reply, region_id + ); + let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else { + return error::UnexpectedInstructionReplySnafu { + mailbox_message: msg.to_string(), + reason: "expect close region reply", + } + .fail(); + }; + + if result { + Ok(()) + } else { + error::UnexpectedSnafu { + violated: format!( + "Failed to close downgraded leader region: {region_id} on datanode {:?}, error: {error:?}", + downgrade_leader_datanode, + ), + } + .fail() + } + } + + Err(e) => Err(e), + } + } +} diff --git a/src/meta-srv/src/procedure/region_migration/migration_start.rs b/src/meta-srv/src/procedure/region_migration/migration_start.rs index 3f81033410..4c097631d3 100644 --- a/src/meta-srv/src/procedure/region_migration/migration_start.rs +++ b/src/meta-srv/src/procedure/region_migration/migration_start.rs @@ -21,11 +21,11 @@ use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; use store_api::storage::RegionId; -use super::migration_abort::RegionMigrationAbort; -use super::migration_end::RegionMigrationEnd; -use super::open_candidate_region::OpenCandidateRegion; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; +use crate::procedure::region_migration::migration_end::RegionMigrationEnd; +use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; /// The behaviors: diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs index 22b64b0142..6a96540b82 100644 --- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs @@ -25,9 +25,9 @@ use common_telemetry::info; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; use crate::service::mailbox::Channel; @@ -145,7 +145,10 @@ impl OpenCandidateRegion { match receiver.await? { Ok(msg) => { let reply = HeartbeatMailbox::json_reply(&msg)?; - info!("Received open region reply: {:?}", reply); + info!( + "Received open region reply: {:?}, region: {}", + reply, region_id + ); let InstructionReply::OpenRegion(SimpleReply { result, error }) = reply else { return error::UnexpectedInstructionReplySnafu { mailbox_message: msg.to_string(), diff --git a/src/meta-srv/src/procedure/region_migration/test_util.rs b/src/meta-srv/src/procedure/region_migration/test_util.rs index 2058782396..2fe55edcab 100644 --- a/src/meta-srv/src/procedure/region_migration/test_util.rs +++ b/src/meta-srv/src/procedure/region_migration/test_util.rs @@ -44,19 +44,21 @@ use store_api::storage::RegionId; use table::metadata::RawTableInfo; use tokio::sync::mpsc::{Receiver, Sender}; -use super::manager::RegionMigrationProcedureTracker; -use super::migration_abort::RegionMigrationAbort; -use super::upgrade_candidate_region::UpgradeCandidateRegion; -use super::{Context, ContextFactory, DefaultContextFactory, State, VolatileContext}; use crate::cache_invalidator::MetasrvCacheInvalidator; use crate::error::{self, Error, Result}; use crate::handler::{HeartbeatMailbox, Pusher, Pushers}; use crate::metasrv::MetasrvInfo; +use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion; +use crate::procedure::region_migration::manager::RegionMigrationProcedureTracker; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; use crate::procedure::region_migration::migration_end::RegionMigrationEnd; use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion; use crate::procedure::region_migration::update_metadata::UpdateMetadata; -use crate::procedure::region_migration::PersistentContext; +use crate::procedure::region_migration::upgrade_candidate_region::UpgradeCandidateRegion; +use crate::procedure::region_migration::{ + Context, ContextFactory, DefaultContextFactory, PersistentContext, State, VolatileContext, +}; use crate::service::mailbox::{Channel, MailboxRef}; pub type MockHeartbeatReceiver = Receiver>; @@ -569,6 +571,14 @@ pub(crate) fn assert_region_migration_end(next: &dyn State) { let _ = next.as_any().downcast_ref::().unwrap(); } +/// Asserts the [State] should be [CloseDowngradedRegion]. +pub(crate) fn assert_close_downgraded_region(next: &dyn State) { + let _ = next + .as_any() + .downcast_ref::() + .unwrap(); +} + /// Asserts the [State] should be [RegionMigrationAbort]. pub(crate) fn assert_region_migration_abort(next: &dyn State) { let _ = next diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata.rs b/src/meta-srv/src/procedure/region_migration/update_metadata.rs index 180cf31fe1..858669ea21 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata.rs @@ -22,10 +22,10 @@ use common_procedure::Status; use common_telemetry::warn; use serde::{Deserialize, Serialize}; -use super::migration_abort::RegionMigrationAbort; -use super::migration_end::RegionMigrationEnd; use crate::error::Result; +use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; use crate::procedure::region_migration::{Context, State}; #[derive(Debug, Serialize, Deserialize)] @@ -58,7 +58,7 @@ impl State for UpdateMetadata { if let Err(err) = ctx.invalidate_table_cache().await { warn!("Failed to broadcast the invalidate table cache message during the upgrade candidate, error: {err:?}"); }; - Ok((Box::new(RegionMigrationEnd), Status::done())) + Ok((Box::new(CloseDowngradedRegion), Status::executing(false))) } UpdateMetadata::Rollback => { self.rollback_downgraded_region(ctx).await?; diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs index b710a0e1f3..c180456bd4 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs @@ -195,7 +195,7 @@ mod tests { use store_api::storage::RegionId; use crate::error::Error; - use crate::procedure::region_migration::migration_end::RegionMigrationEnd; + use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::test_util::{self, TestingEnv}; use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{ContextFactory, PersistentContext, State}; @@ -443,7 +443,7 @@ mod tests { } #[tokio::test] - async fn test_next_migration_end_state() { + async fn test_next_close_downgraded_region_state() { let mut state = Box::new(UpdateMetadata::Upgrade); let env = TestingEnv::new(); let persistent_context = new_persistent_context(); @@ -471,7 +471,10 @@ mod tests { let (next, _) = state.next(&mut ctx).await.unwrap(); - let _ = next.as_any().downcast_ref::().unwrap(); + let _ = next + .as_any() + .downcast_ref::() + .unwrap(); let table_route = table_metadata_manager .table_route_manager() diff --git a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs index 49100e92f3..fa989274b4 100644 --- a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs @@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize}; use snafu::{ensure, OptionExt, ResultExt}; use tokio::time::{sleep, Instant}; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; use crate::service::mailbox::Channel; @@ -155,7 +155,7 @@ impl UpgradeCandidateRegion { exists, error::UnexpectedSnafu { violated: format!( - "Expected region {} doesn't exist on datanode {:?}", + "Candidate region {} doesn't exist on datanode {:?}", region_id, candidate ) } diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 86b64ddfae..15b9470113 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -210,7 +210,6 @@ impl RegionEngine for MetricEngine { for x in [ utils::to_metadata_region_id(region_id), utils::to_data_region_id(region_id), - region_id, ] { if let Err(e) = self.inner.mito.set_region_role(x, role) && e.status_code() != StatusCode::RegionNotFound @@ -226,6 +225,13 @@ impl RegionEngine for MetricEngine { region_id: RegionId, region_role_state: SettableRegionRoleState, ) -> std::result::Result { + self.inner + .mito + .set_region_role_state_gracefully( + utils::to_metadata_region_id(region_id), + region_role_state, + ) + .await?; self.inner .mito .set_region_role_state_gracefully(region_id, region_role_state) diff --git a/src/metric-engine/src/engine/catchup.rs b/src/metric-engine/src/engine/catchup.rs index 4b1268c049..783e1f009c 100644 --- a/src/metric-engine/src/engine/catchup.rs +++ b/src/metric-engine/src/engine/catchup.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_telemetry::debug; use snafu::ResultExt; use store_api::region_engine::RegionEngine; use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest}; @@ -35,6 +36,7 @@ impl MetricEngineInner { } let metadata_region_id = utils::to_metadata_region_id(region_id); // TODO(weny): improve the catchup, we can read the wal entries only once. + debug!("Catchup metadata region {metadata_region_id}"); self.mito .handle_request( metadata_region_id, @@ -48,6 +50,7 @@ impl MetricEngineInner { .context(MitoCatchupOperationSnafu)?; let data_region_id = utils::to_data_region_id(region_id); + debug!("Catchup data region {data_region_id}"); self.mito .handle_request( data_region_id, diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index c5f7a2b4a3..d0f8cf5028 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -313,12 +313,12 @@ mod test { let region_dir = "test_metric_region"; // assert metadata region's dir let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR); - let exist = object_store.is_exist(&metadata_region_dir).await.unwrap(); + let exist = object_store.exists(&metadata_region_dir).await.unwrap(); assert!(exist); // assert data region's dir let data_region_dir = join_dir(region_dir, DATA_REGION_SUBDIR); - let exist = object_store.is_exist(&data_region_dir).await.unwrap(); + let exist = object_store.exists(&data_region_dir).await.unwrap(); assert!(exist); // check mito engine diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index eedf6ae636..56d480df5a 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -17,6 +17,7 @@ aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" +bytemuck.workspace = true bytes.workspace = true common-base.workspace = true common-config.workspace = true @@ -76,7 +77,6 @@ uuid.workspace = true [dev-dependencies] common-function.workspace = true common-meta = { workspace = true, features = ["testing"] } -common-procedure-test.workspace = true common-test-util.workspace = true criterion = "0.4" dotenv.workspace = true diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 7d977a328c..03cf913624 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -32,6 +32,7 @@ use moka::notification::RemovalCause; use moka::sync::Cache; use parquet::column::page::Page; use parquet::file::metadata::ParquetMetaData; +use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef}; use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector}; use crate::cache::cache_size::parquet_meta_size; @@ -68,6 +69,8 @@ pub struct CacheManager { write_cache: Option, /// Cache for inverted index. index_cache: Option, + /// Puffin metadata cache. + puffin_metadata_cache: Option, /// Cache for time series selectors. selector_result_cache: Option, } @@ -217,6 +220,10 @@ impl CacheManager { pub(crate) fn index_cache(&self) -> Option<&InvertedIndexCacheRef> { self.index_cache.as_ref() } + + pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> { + self.puffin_metadata_cache.as_ref() + } } /// Increases selector cache miss metrics. @@ -237,6 +244,8 @@ pub struct CacheManagerBuilder { page_cache_size: u64, index_metadata_size: u64, index_content_size: u64, + index_content_page_size: u64, + puffin_metadata_size: u64, write_cache: Option, selector_result_cache_size: u64, } @@ -278,6 +287,18 @@ impl CacheManagerBuilder { self } + /// Sets page size for index content. + pub fn index_content_page_size(mut self, bytes: u64) -> Self { + self.index_content_page_size = bytes; + self + } + + /// Sets cache size for puffin metadata. + pub fn puffin_metadata_size(mut self, bytes: u64) -> Self { + self.puffin_metadata_size = bytes; + self + } + /// Sets selector result cache size. pub fn selector_result_cache_size(mut self, bytes: u64) -> Self { self.selector_result_cache_size = bytes; @@ -338,8 +359,13 @@ impl CacheManagerBuilder { }) .build() }); - let inverted_index_cache = - InvertedIndexCache::new(self.index_metadata_size, self.index_content_size); + let inverted_index_cache = InvertedIndexCache::new( + self.index_metadata_size, + self.index_content_size, + self.index_content_page_size, + ); + let puffin_metadata_cache = + PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES); let selector_result_cache = (self.selector_result_cache_size != 0).then(|| { Cache::builder() .max_capacity(self.selector_result_cache_size) @@ -361,6 +387,7 @@ impl CacheManagerBuilder { page_cache, write_cache: self.write_cache, index_cache: Some(Arc::new(inverted_index_cache)), + puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)), selector_result_cache, } } diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index 9e5742ca04..eb112530ca 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -286,7 +286,7 @@ impl FileCache { } async fn get_reader(&self, file_path: &str) -> object_store::Result> { - if self.local_store.is_exist(file_path).await? { + if self.local_store.exists(file_path).await? { Ok(Some(self.local_store.reader(file_path).await?)) } else { Ok(None) @@ -480,7 +480,7 @@ mod tests { cache.memory_index.run_pending_tasks().await; // The file also not exists. - assert!(!local_store.is_exist(&file_path).await.unwrap()); + assert!(!local_store.exists(&file_path).await.unwrap()); assert_eq!(0, cache.memory_index.weighted_size()); } diff --git a/src/mito2/src/cache/index.rs b/src/mito2/src/cache/index.rs index 4e6e4deee2..de39ea3784 100644 --- a/src/mito2/src/cache/index.rs +++ b/src/mito2/src/cache/index.rs @@ -12,14 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use api::v1::index::InvertedIndexMetas; use async_trait::async_trait; +use bytes::Bytes; use common_base::BitVec; use index::inverted_index::error::DecodeFstSnafu; use index::inverted_index::format::reader::InvertedIndexReader; use index::inverted_index::FstMap; +use object_store::Buffer; use prost::Message; use snafu::ResultExt; @@ -34,14 +37,16 @@ const INDEX_CONTENT_TYPE: &str = "index_content"; /// Inverted index blob reader with cache. pub struct CachedInvertedIndexBlobReader { file_id: FileId, + file_size: u64, inner: R, cache: InvertedIndexCacheRef, } impl CachedInvertedIndexBlobReader { - pub fn new(file_id: FileId, inner: R, cache: InvertedIndexCacheRef) -> Self { + pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self { Self { file_id, + file_size, inner, cache, } @@ -59,43 +64,71 @@ where offset: u64, size: u32, ) -> index::inverted_index::error::Result> { - let range = offset as usize..(offset + size as u64) as usize; - if let Some(cached) = self.cache.get_index(IndexKey { - file_id: self.file_id, - }) { - CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); - Ok(cached[range].to_vec()) - } else { - let mut all_data = Vec::with_capacity(1024 * 1024); - self.inner.read_all(&mut all_data).await?; - let result = all_data[range].to_vec(); - self.cache.put_index( - IndexKey { - file_id: self.file_id, - }, - Arc::new(all_data), - ); - CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); - Ok(result) + let keys = + IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size); + // Size is 0, return empty data. + if keys.is_empty() { + return Ok(Vec::new()); } + let mut data = Vec::with_capacity(keys.len()); + data.resize(keys.len(), Bytes::new()); + let mut cache_miss_range = vec![]; + let mut cache_miss_idx = vec![]; + let last_index = keys.len() - 1; + // TODO: Avoid copy as much as possible. + for (i, index) in keys.iter().enumerate() { + match self.cache.get_index(index) { + Some(page) => { + CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); + data[i] = page; + } + None => { + CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); + let base_offset = index.page_id * self.cache.page_size; + let pruned_size = if i == last_index { + prune_size(&keys, self.file_size, self.cache.page_size) + } else { + self.cache.page_size + }; + cache_miss_range.push(base_offset..base_offset + pruned_size); + cache_miss_idx.push(i); + } + } + } + if !cache_miss_range.is_empty() { + let pages = self.inner.read_vec(&cache_miss_range).await?; + for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) { + let key = keys[i].clone(); + data[i] = page.clone(); + self.cache.put_index(key, page.clone()); + } + } + let buffer = Buffer::from_iter(data.into_iter()); + Ok(buffer + .slice(IndexDataPageKey::calculate_range( + offset, + size, + self.cache.page_size, + )) + .to_vec()) } } #[async_trait] impl InvertedIndexReader for CachedInvertedIndexBlobReader { - async fn read_all( - &mut self, - dest: &mut Vec, - ) -> index::inverted_index::error::Result { - self.inner.read_all(dest).await - } - - async fn seek_read( + async fn range_read( &mut self, offset: u64, size: u32, ) -> index::inverted_index::error::Result> { - self.inner.seek_read(offset, size).await + self.inner.range_read(offset, size).await + } + + async fn read_vec( + &mut self, + ranges: &[Range], + ) -> index::inverted_index::error::Result> { + self.inner.read_vec(ranges).await } async fn metadata(&mut self) -> index::inverted_index::error::Result> { @@ -130,22 +163,69 @@ impl InvertedIndexReader for CachedInvertedIndexBlobRead } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct IndexKey { +pub struct IndexMetadataKey { file_id: FileId, } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IndexDataPageKey { + file_id: FileId, + page_id: u64, +} + +impl IndexDataPageKey { + /// Converts an offset to a page ID based on the page size. + fn calculate_page_id(offset: u64, page_size: u64) -> u64 { + offset / page_size + } + + /// Calculates the total number of pages that a given size spans, starting from a specific offset. + fn calculate_page_count(offset: u64, size: u32, page_size: u64) -> u32 { + let start_page = Self::calculate_page_id(offset, page_size); + let end_page = Self::calculate_page_id(offset + (size as u64) - 1, page_size); + (end_page + 1 - start_page) as u32 + } + + /// Calculates the byte range for data retrieval based on the specified offset and size. + /// + /// This function determines the starting and ending byte positions required for reading data. + /// For example, with an offset of 5000 and a size of 5000, using a PAGE_SIZE of 4096, + /// the resulting byte range will be 904..5904. This indicates that: + /// - The reader will first access fixed-size pages [4096, 8192) and [8192, 12288). + /// - To read the range [5000..10000), it only needs to fetch bytes within the range [904, 5904) across two pages. + fn calculate_range(offset: u64, size: u32, page_size: u64) -> Range { + let start = (offset % page_size) as usize; + let end = start + size as usize; + start..end + } + + /// Generates a vector of IndexKey instances for the pages that a given offset and size span. + fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec { + let start_page = Self::calculate_page_id(offset, page_size); + let total_pages = Self::calculate_page_count(offset, size, page_size); + (0..total_pages) + .map(|i| Self { + file_id, + page_id: start_page + i as u64, + }) + .collect() + } +} + pub type InvertedIndexCacheRef = Arc; pub struct InvertedIndexCache { /// Cache for inverted index metadata - index_metadata: moka::sync::Cache>, + index_metadata: moka::sync::Cache>, /// Cache for inverted index content. - index: moka::sync::Cache>>, + index: moka::sync::Cache, + // Page size for index content. + page_size: u64, } impl InvertedIndexCache { /// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`. - pub fn new(index_metadata_cap: u64, index_content_cap: u64) -> Self { + pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self { common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}"); let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap) .name("inverted_index_metadata") @@ -170,29 +250,29 @@ impl InvertedIndexCache { Self { index_metadata, index: index_cache, + page_size, } } } impl InvertedIndexCache { pub fn get_index_metadata(&self, file_id: FileId) -> Option> { - self.index_metadata.get(&IndexKey { file_id }) + self.index_metadata.get(&IndexMetadataKey { file_id }) } pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc) { - let key = IndexKey { file_id }; + let key = IndexMetadataKey { file_id }; CACHE_BYTES .with_label_values(&[INDEX_METADATA_TYPE]) .add(index_metadata_weight(&key, &metadata).into()); self.index_metadata.insert(key, metadata) } - // todo(hl): align index file content to pages with size like 4096 bytes. - pub fn get_index(&self, key: IndexKey) -> Option>> { - self.index.get(&key) + pub fn get_index(&self, key: &IndexDataPageKey) -> Option { + self.index.get(key) } - pub fn put_index(&self, key: IndexKey, value: Arc>) { + pub fn put_index(&self, key: IndexDataPageKey, value: Bytes) { CACHE_BYTES .with_label_values(&[INDEX_CONTENT_TYPE]) .add(index_content_weight(&key, &value).into()); @@ -201,11 +281,229 @@ impl InvertedIndexCache { } /// Calculates weight for index metadata. -fn index_metadata_weight(k: &IndexKey, v: &Arc) -> u32 { +fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc) -> u32 { (k.file_id.as_bytes().len() + v.encoded_len()) as u32 } /// Calculates weight for index content. -fn index_content_weight(k: &IndexKey, v: &Arc>) -> u32 { +fn index_content_weight(k: &IndexDataPageKey, v: &Bytes) -> u32 { (k.file_id.as_bytes().len() + v.len()) as u32 } + +/// Prunes the size of the last page based on the indexes. +/// We have following cases: +/// 1. The rest file size is less than the page size, read to the end of the file. +/// 2. Otherwise, read the page size. +fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 { + let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0); + page_size.min(file_size - last_page_start) +} + +#[cfg(test)] +mod test { + use std::num::NonZeroUsize; + + use common_base::BitVec; + use futures::stream; + use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader}; + use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter}; + use index::inverted_index::Bytes; + use prometheus::register_int_counter_vec; + use rand::{Rng, RngCore}; + + use super::*; + use crate::sst::index::store::InstrumentedStore; + use crate::test_util::TestEnv; + + // Repeat times for following little fuzz tests. + const FUZZ_REPEAT_TIMES: usize = 100; + + // Fuzz test for index data page key + #[test] + fn fuzz_index_calculation() { + // randomly generate a large u8 array + let mut rng = rand::thread_rng(); + let mut data = vec![0u8; 1024 * 1024]; + rng.fill_bytes(&mut data); + let file_id = FileId::random(); + + for _ in 0..FUZZ_REPEAT_TIMES { + let offset = rng.gen_range(0..data.len() as u64); + let size = rng.gen_range(0..data.len() as u32 - offset as u32); + let page_size: usize = rng.gen_range(1..1024); + + let indexes = + IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64); + let page_num = indexes.len(); + let mut read = Vec::with_capacity(size as usize); + for key in indexes.into_iter() { + let start = key.page_id as usize * page_size; + let page = if start + page_size < data.len() { + &data[start..start + page_size] + } else { + &data[start..] + }; + read.extend_from_slice(page); + } + let expected_range = offset as usize..(offset + size as u64 as u64) as usize; + let read = + read[IndexDataPageKey::calculate_range(offset, size, page_size as u64)].to_vec(); + if read != data.get(expected_range).unwrap() { + panic!( + "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nrange: {:?}, page num: {}", + offset, size, page_size, read.len(), size as usize, + IndexDataPageKey::calculate_range(offset, size, page_size as u64), + page_num + ); + } + } + } + + fn unpack(fst_value: u64) -> [u32; 2] { + bytemuck::cast::(fst_value) + } + + async fn create_inverted_index_blob() -> Vec { + let mut blob = Vec::new(); + let mut writer = InvertedIndexBlobWriter::new(&mut blob); + writer + .add_index( + "tag0".to_string(), + BitVec::from_slice(&[0b0000_0001, 0b0000_0000]), + Box::new(stream::iter(vec![ + Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))), + Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))), + Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))), + ])), + ) + .await + .unwrap(); + writer + .add_index( + "tag1".to_string(), + BitVec::from_slice(&[0b0000_0001, 0b0000_0000]), + Box::new(stream::iter(vec![ + Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))), + Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))), + Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))), + ])), + ) + .await + .unwrap(); + writer + .finish(8, NonZeroUsize::new(1).unwrap()) + .await + .unwrap(); + + blob + } + + #[tokio::test] + async fn test_inverted_index_cache() { + let blob = create_inverted_index_blob().await; + + // Init a test range reader in local fs. + let mut env = TestEnv::new(); + let file_size = blob.len() as u64; + let store = env.init_object_store_manager(); + let temp_path = "data"; + store.write(temp_path, blob).await.unwrap(); + let store = InstrumentedStore::new(store); + let metric = + register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap(); + let counter = metric.with_label_values(&["test"]); + let range_reader = store + .range_reader("data", &counter, &counter) + .await + .unwrap(); + + let reader = InvertedIndexBlobReader::new(range_reader); + let mut cached_reader = CachedInvertedIndexBlobReader::new( + FileId::random(), + file_size, + reader, + Arc::new(InvertedIndexCache::new(8192, 8192, 50)), + ); + let metadata = cached_reader.metadata().await.unwrap(); + assert_eq!(metadata.total_row_count, 8); + assert_eq!(metadata.segment_row_count, 1); + assert_eq!(metadata.metas.len(), 2); + // tag0 + let tag0 = metadata.metas.get("tag0").unwrap(); + let stats0 = tag0.stats.as_ref().unwrap(); + assert_eq!(stats0.distinct_count, 3); + assert_eq!(stats0.null_count, 1); + assert_eq!(stats0.min_value, Bytes::from("a")); + assert_eq!(stats0.max_value, Bytes::from("c")); + let fst0 = cached_reader + .fst( + tag0.base_offset + tag0.relative_fst_offset as u64, + tag0.fst_size, + ) + .await + .unwrap(); + assert_eq!(fst0.len(), 3); + let [offset, size] = unpack(fst0.get(b"a").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + let [offset, size] = unpack(fst0.get(b"b").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); + let [offset, size] = unpack(fst0.get(b"c").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + + // tag1 + let tag1 = metadata.metas.get("tag1").unwrap(); + let stats1 = tag1.stats.as_ref().unwrap(); + assert_eq!(stats1.distinct_count, 3); + assert_eq!(stats1.null_count, 1); + assert_eq!(stats1.min_value, Bytes::from("x")); + assert_eq!(stats1.max_value, Bytes::from("z")); + let fst1 = cached_reader + .fst( + tag1.base_offset + tag1.relative_fst_offset as u64, + tag1.fst_size, + ) + .await + .unwrap(); + assert_eq!(fst1.len(), 3); + let [offset, size] = unpack(fst1.get(b"x").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + let [offset, size] = unpack(fst1.get(b"y").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); + let [offset, size] = unpack(fst1.get(b"z").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + + // fuzz test + let mut rng = rand::thread_rng(); + for _ in 0..FUZZ_REPEAT_TIMES { + let offset = rng.gen_range(0..file_size); + let size = rng.gen_range(0..file_size as u32 - offset as u32); + let expected = cached_reader.range_read(offset, size).await.unwrap(); + let read = cached_reader.get_or_load(offset, size).await.unwrap(); + assert_eq!(read, expected); + } + } +} diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 93ccf986ce..a08f8b7834 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -44,7 +44,7 @@ use tokio::sync::mpsc::{self, Sender}; use crate::access_layer::AccessLayerRef; use crate::cache::CacheManagerRef; -use crate::compaction::compactor::{CompactionRegion, DefaultCompactor}; +use crate::compaction::compactor::{CompactionRegion, CompactionVersion, DefaultCompactor}; use crate::compaction::picker::{new_picker, CompactionTask}; use crate::compaction::task::CompactionTaskImpl; use crate::config::MitoConfig; @@ -59,7 +59,7 @@ use crate::read::scan_region::ScanInput; use crate::read::seq_scan::SeqScan; use crate::read::BoxedBatchReader; use crate::region::options::MergeMode; -use crate::region::version::{VersionControlRef, VersionRef}; +use crate::region::version::VersionControlRef; use crate::region::ManifestContextRef; use crate::request::{OptionOutputTx, OutputTx, WorkerRequest}; use crate::schedule::remote_job_scheduler::{ @@ -73,7 +73,7 @@ use crate::worker::WorkerListener; /// Region compaction request. pub struct CompactionRequest { pub(crate) engine_config: Arc, - pub(crate) current_version: VersionRef, + pub(crate) current_version: CompactionVersion, pub(crate) access_layer: AccessLayerRef, /// Sender to send notification to the region worker. pub(crate) request_sender: mpsc::Sender, @@ -522,7 +522,7 @@ impl CompactionStatus { listener: WorkerListener, schema_metadata_manager: SchemaMetadataManagerRef, ) -> CompactionRequest { - let current_version = self.version_control.current().version; + let current_version = CompactionVersion::from(self.version_control.current().version); let start_time = Instant::now(); let mut req = CompactionRequest { engine_config, diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs index 91ab34c961..e2499140fd 100644 --- a/src/mito2/src/compaction/compactor.rs +++ b/src/mito2/src/compaction/compactor.rs @@ -35,12 +35,10 @@ use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Res use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList}; use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions}; use crate::manifest::storage::manifest_compress_type; -use crate::memtable::time_partition::TimePartitions; -use crate::memtable::MemtableBuilderProvider; use crate::read::Source; use crate::region::opener::new_manifest_dir; use crate::region::options::RegionOptions; -use crate::region::version::{VersionBuilder, VersionRef}; +use crate::region::version::VersionRef; use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState}; use crate::schedule::scheduler::LocalScheduler; use crate::sst::file::{FileMeta, IndexType}; @@ -48,6 +46,34 @@ use crate::sst::file_purger::LocalFilePurger; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::puffin_manager::PuffinManagerFactory; use crate::sst::parquet::WriteOptions; +use crate::sst::version::{SstVersion, SstVersionRef}; + +/// Region version for compaction that does not hold memtables. +#[derive(Clone)] +pub struct CompactionVersion { + /// Metadata of the region. + /// + /// Altering metadata isn't frequent, storing metadata in Arc to allow sharing + /// metadata and reuse metadata when creating a new `Version`. + pub(crate) metadata: RegionMetadataRef, + /// Options of the region. + pub(crate) options: RegionOptions, + /// SSTs of the region. + pub(crate) ssts: SstVersionRef, + /// Inferred compaction time window. + pub(crate) compaction_time_window: Option, +} + +impl From for CompactionVersion { + fn from(value: VersionRef) -> Self { + Self { + metadata: value.metadata.clone(), + options: value.options.clone(), + ssts: value.ssts.clone(), + compaction_time_window: value.compaction_time_window, + } + } +} /// CompactionRegion represents a region that needs to be compacted. /// It's the subset of MitoRegion. @@ -62,7 +88,7 @@ pub struct CompactionRegion { pub(crate) cache_manager: CacheManagerRef, pub(crate) access_layer: AccessLayerRef, pub(crate) manifest_ctx: Arc, - pub(crate) current_version: VersionRef, + pub(crate) current_version: CompactionVersion, pub(crate) file_purger: Option>, pub(crate) ttl: Option, } @@ -147,30 +173,14 @@ pub async fn open_compaction_region( }; let current_version = { - let memtable_builder = MemtableBuilderProvider::new(None, Arc::new(mito_config.clone())) - .builder_for_options( - req.region_options.memtable.as_ref(), - req.region_options.need_dedup(), - req.region_options.merge_mode(), - ); - - // Initial memtable id is 0. - let mutable = Arc::new(TimePartitions::new( - region_metadata.clone(), - memtable_builder.clone(), - 0, - req.region_options.compaction.time_window(), - )); - - let version = VersionBuilder::new(region_metadata.clone(), mutable) - .add_files(file_purger.clone(), manifest.files.values().cloned()) - .flushed_entry_id(manifest.flushed_entry_id) - .flushed_sequence(manifest.flushed_sequence) - .truncated_entry_id(manifest.truncated_entry_id) - .compaction_time_window(manifest.compaction_time_window) - .options(req.region_options.clone()) - .build(); - Arc::new(version) + let mut ssts = SstVersion::new(); + ssts.add_files(file_purger.clone(), manifest.files.values().cloned()); + CompactionVersion { + metadata: region_metadata.clone(), + options: req.region_options.clone(), + ssts: Arc::new(ssts), + compaction_time_window: manifest.compaction_time_window, + } }; let ttl = find_ttl( diff --git a/src/mito2/src/compaction/window.rs b/src/mito2/src/compaction/window.rs index f16b8e4c95..10bdb47297 100644 --- a/src/mito2/src/compaction/window.rs +++ b/src/mito2/src/compaction/window.rs @@ -23,10 +23,9 @@ use common_time::Timestamp; use store_api::storage::RegionId; use crate::compaction::buckets::infer_time_bucket; -use crate::compaction::compactor::CompactionRegion; +use crate::compaction::compactor::{CompactionRegion, CompactionVersion}; use crate::compaction::picker::{Picker, PickerOutput}; use crate::compaction::{get_expired_ssts, CompactionOutput}; -use crate::region::version::VersionRef; use crate::sst::file::{FileHandle, FileId}; /// Compaction picker that splits the time range of all involved files to windows, and merges @@ -48,7 +47,11 @@ impl WindowedCompactionPicker { // use persisted window. If persist window is not present, we check the time window // provided while creating table. If all of those are absent, we infer the window // from files in level0. - fn calculate_time_window(&self, region_id: RegionId, current_version: &VersionRef) -> i64 { + fn calculate_time_window( + &self, + region_id: RegionId, + current_version: &CompactionVersion, + ) -> i64 { self.compaction_time_window_seconds .or(current_version .compaction_time_window @@ -67,7 +70,7 @@ impl WindowedCompactionPicker { fn pick_inner( &self, region_id: RegionId, - current_version: &VersionRef, + current_version: &CompactionVersion, current_time: Timestamp, ) -> (Vec, Vec, i64) { let time_window = self.calculate_time_window(region_id, current_version); @@ -205,28 +208,19 @@ mod tests { use common_time::Timestamp; use store_api::storage::RegionId; + use crate::compaction::compactor::CompactionVersion; use crate::compaction::window::{file_time_bucket_span, WindowedCompactionPicker}; - use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder}; - use crate::memtable::time_partition::TimePartitions; - use crate::memtable::version::MemtableVersion; use crate::region::options::RegionOptions; - use crate::region::version::{Version, VersionRef}; use crate::sst::file::{FileId, FileMeta, Level}; use crate::sst::version::SstVersion; use crate::test_util::memtable_util::metadata_for_test; use crate::test_util::NoopFilePurger; - fn build_version(files: &[(FileId, i64, i64, Level)], ttl: Option) -> VersionRef { + fn build_version( + files: &[(FileId, i64, i64, Level)], + ttl: Option, + ) -> CompactionVersion { let metadata = metadata_for_test(); - let memtables = Arc::new(MemtableVersion::new(Arc::new(TimePartitions::new( - metadata.clone(), - Arc::new(PartitionTreeMemtableBuilder::new( - PartitionTreeConfig::default(), - None, - )), - 0, - None, - )))); let file_purger_ref = Arc::new(NoopFilePurger); let mut ssts = SstVersion::new(); @@ -244,14 +238,9 @@ mod tests { }), ); - Arc::new(Version { + CompactionVersion { metadata, - memtables, ssts: Arc::new(ssts), - flushed_entry_id: 0, - flushed_sequence: 0, - truncated_entry_id: None, - compaction_time_window: None, options: RegionOptions { ttl: ttl.map(|t| t.into()), compaction: Default::default(), @@ -262,7 +251,8 @@ mod tests { memtable: None, merge_mode: None, }, - }) + compaction_time_window: None, + } } #[test] diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index 9b113027a4..963089c60a 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -304,6 +304,9 @@ pub struct IndexConfig { /// Write buffer size for creating the index. pub write_buffer_size: ReadableSize, + + /// Cache size for metadata of puffin files. Setting it to 0 to disable the cache. + pub metadata_cache_size: ReadableSize, } impl Default for IndexConfig { @@ -312,6 +315,7 @@ impl Default for IndexConfig { aux_path: String::new(), staging_size: ReadableSize::gb(2), write_buffer_size: ReadableSize::mb(8), + metadata_cache_size: ReadableSize::mb(64), } } } @@ -412,6 +416,8 @@ pub struct InvertedIndexConfig { pub metadata_cache_size: ReadableSize, /// Cache size for inverted index content. Setting it to 0 to disable the cache. pub content_cache_size: ReadableSize, + /// Page size for inverted index content. + pub content_cache_page_size: ReadableSize, } impl InvertedIndexConfig { @@ -437,6 +443,7 @@ impl Default for InvertedIndexConfig { intermediate_path: String::new(), metadata_cache_size: ReadableSize::mb(64), content_cache_size: ReadableSize::mb(128), + content_cache_page_size: ReadableSize::mb(8), }; if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { diff --git a/src/mito2/src/engine/create_test.rs b/src/mito2/src/engine/create_test.rs index 48b04dc86d..4bcc559340 100644 --- a/src/mito2/src/engine/create_test.rs +++ b/src/mito2/src/engine/create_test.rs @@ -192,12 +192,12 @@ async fn test_engine_create_with_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(region_dir) + .exists(region_dir) .await .unwrap()); assert!(!object_store_manager .default_object_store() - .is_exist(region_dir) + .exists(region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/drop_test.rs b/src/mito2/src/engine/drop_test.rs index 7d719f778b..5d0c5afbf0 100644 --- a/src/mito2/src/engine/drop_test.rs +++ b/src/mito2/src/engine/drop_test.rs @@ -71,7 +71,7 @@ async fn test_engine_drop_region() { assert!(!env .get_object_store() .unwrap() - .is_exist(&join_path(®ion_dir, DROPPING_MARKER_FILE)) + .exists(&join_path(®ion_dir, DROPPING_MARKER_FILE)) .await .unwrap()); @@ -93,7 +93,7 @@ async fn test_engine_drop_region() { listener.wait().await; let object_store = env.get_object_store().unwrap(); - assert!(!object_store.is_exist(®ion_dir).await.unwrap()); + assert!(!object_store.exists(®ion_dir).await.unwrap()); } #[tokio::test] @@ -167,13 +167,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(&custom_region_dir) + .exists(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .is_exist(&global_region_dir) + .exists(&global_region_dir) .await .unwrap()); @@ -190,13 +190,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(!object_store_manager .find("Gcs") .unwrap() - .is_exist(&custom_region_dir) + .exists(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .is_exist(&global_region_dir) + .exists(&global_region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/open_test.rs b/src/mito2/src/engine/open_test.rs index 6752bbd04b..a3b51514c2 100644 --- a/src/mito2/src/engine/open_test.rs +++ b/src/mito2/src/engine/open_test.rs @@ -228,13 +228,13 @@ async fn test_engine_region_open_with_custom_store() { let object_store_manager = env.get_object_store_manager().unwrap(); assert!(!object_store_manager .default_object_store() - .is_exist(region.access_layer.region_dir()) + .exists(region.access_layer.region_dir()) .await .unwrap()); assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(region.access_layer.region_dir()) + .exists(region.access_layer.region_dir()) .await .unwrap()); } diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index d5e47d2136..82b86a2155 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -756,13 +756,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to build time range filters for value: {:?}", timestamp))] - BuildTimeRangeFilter { - timestamp: Timestamp, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to open region"))] OpenRegion { #[snafu(implicit)] @@ -893,6 +886,14 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to read file metadata"))] + Metadata { + #[snafu(source)] + error: std::io::Error, + #[snafu(implicit)] + location: Location, + }, } pub type Result = std::result::Result; @@ -965,7 +966,8 @@ impl ErrorExt for Error { | CreateDir { .. } | ReadDataPart { .. } | CorruptedEntry { .. } - | BuildEntry { .. } => StatusCode::Internal, + | BuildEntry { .. } + | Metadata { .. } => StatusCode::Internal, OpenRegion { source, .. } => source.status_code(), @@ -1014,7 +1016,6 @@ impl ErrorExt for Error { ChecksumMismatch { .. } => StatusCode::Unexpected, RegionStopped { .. } => StatusCode::RegionNotReady, TimeRangePredicateOverflow { .. } => StatusCode::InvalidArguments, - BuildTimeRangeFilter { .. } => StatusCode::Unexpected, UnsupportedOperation { .. } => StatusCode::Unsupported, RemoteCompaction { .. } => StatusCode::Unexpected, diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs index 81e9efd90a..b522f225f9 100644 --- a/src/mito2/src/flush.rs +++ b/src/mito2/src/flush.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; -use common_telemetry::{debug, error, info}; +use common_telemetry::{debug, error, info, trace}; use smallvec::SmallVec; use snafu::ResultExt; use store_api::storage::RegionId; @@ -32,7 +32,10 @@ use crate::error::{ Error, FlushRegionSnafu, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result, }; use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList}; -use crate::metrics::{FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL, INFLIGHT_FLUSH_COUNT}; +use crate::metrics::{ + FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL, + INFLIGHT_FLUSH_COUNT, +}; use crate::read::Source; use crate::region::options::IndexOptions; use crate::region::version::{VersionControlData, VersionControlRef}; @@ -138,17 +141,22 @@ impl WriteBufferManager for WriteBufferManagerImpl { // If the memory exceeds the buffer size, we trigger more aggressive // flush. But if already more than half memory is being flushed, // triggering more flush may not help. We will hold it instead. - if memory_usage >= self.global_write_buffer_size - && mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 - { - debug!( + if memory_usage >= self.global_write_buffer_size { + if mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 { + debug!( "Engine should flush (over total limit), memory_usage: {}, global_write_buffer_size: {}, \ mutable_usage: {}.", memory_usage, self.global_write_buffer_size, - mutable_memtable_memory_usage, - ); - return true; + mutable_memtable_memory_usage); + return true; + } else { + trace!( + "Engine won't flush, memory_usage: {}, global_write_buffer_size: {}, mutable_usage: {}.", + memory_usage, + self.global_write_buffer_size, + mutable_memtable_memory_usage); + } } false diff --git a/src/mito2/src/manifest/tests/checkpoint.rs b/src/mito2/src/manifest/tests/checkpoint.rs index 692f40422b..6f2c92bc5e 100644 --- a/src/mito2/src/manifest/tests/checkpoint.rs +++ b/src/mito2/src/manifest/tests/checkpoint.rs @@ -84,6 +84,7 @@ async fn manager_without_checkpoint() { // check files let mut expected = vec![ + "/", "00000000000000000010.json", "00000000000000000009.json", "00000000000000000008.json", @@ -130,6 +131,7 @@ async fn manager_with_checkpoint_distance_1() { // check files let mut expected = vec![ + "/", "00000000000000000009.checkpoint", "00000000000000000010.checkpoint", "00000000000000000010.json", diff --git a/src/mito2/src/memtable.rs b/src/mito2/src/memtable.rs index f7d05c621f..6adc6eb96a 100644 --- a/src/mito2/src/memtable.rs +++ b/src/mito2/src/memtable.rs @@ -110,6 +110,15 @@ impl MemtableStats { pub type BoxedBatchIterator = Box> + Send>; +/// Ranges in a memtable. +#[derive(Default)] +pub struct MemtableRanges { + /// Range IDs and ranges. + pub ranges: BTreeMap, + /// Statistics of the memtable at the query time. + pub stats: MemtableStats, +} + /// In memory write buffer. pub trait Memtable: Send + Sync + fmt::Debug { /// Returns the id of this memtable. @@ -139,7 +148,7 @@ pub trait Memtable: Send + Sync + fmt::Debug { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap; + ) -> MemtableRanges; /// Returns true if the memtable is empty. fn is_empty(&self) -> bool; diff --git a/src/mito2/src/memtable/bulk.rs b/src/mito2/src/memtable/bulk.rs index 46e757f3df..96e6c70acd 100644 --- a/src/mito2/src/memtable/bulk.rs +++ b/src/mito2/src/memtable/bulk.rs @@ -14,7 +14,6 @@ //! Memtable implementation for bulk load -use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use store_api::metadata::RegionMetadataRef; @@ -25,7 +24,7 @@ use crate::error::Result; use crate::memtable::bulk::part::BulkPart; use crate::memtable::key_values::KeyValue; use crate::memtable::{ - BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRange, MemtableRef, MemtableStats, + BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRanges, MemtableRef, MemtableStats, }; #[allow(unused)] @@ -68,7 +67,7 @@ impl Memtable for BulkMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { todo!() } diff --git a/src/mito2/src/memtable/partition_tree.rs b/src/mito2/src/memtable/partition_tree.rs index 4c4b471643..1376f92331 100644 --- a/src/mito2/src/memtable/partition_tree.rs +++ b/src/mito2/src/memtable/partition_tree.rs @@ -23,7 +23,6 @@ mod shard; mod shard_builder; mod tree; -use std::collections::BTreeMap; use std::fmt; use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering}; use std::sync::Arc; @@ -41,7 +40,7 @@ use crate::memtable::partition_tree::tree::PartitionTree; use crate::memtable::stats::WriteMetrics; use crate::memtable::{ AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, - MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats, + MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, }; use crate::region::options::MergeMode; @@ -176,7 +175,7 @@ impl Memtable for PartitionTreeMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { let projection = projection.map(|ids| ids.to_vec()); let builder = Box::new(PartitionTreeIterBuilder { tree: self.tree.clone(), @@ -185,7 +184,10 @@ impl Memtable for PartitionTreeMemtable { }); let context = Arc::new(MemtableRangeContext::new(self.id, builder)); - [(0, MemtableRange::new(context))].into() + MemtableRanges { + ranges: [(0, MemtableRange::new(context))].into(), + stats: self.stats(), + } } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index 4959c468b6..8ef6f44121 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -45,7 +45,7 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::stats::WriteMetrics; use crate::memtable::{ AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, - MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats, + MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, }; use crate::metrics::{READ_ROWS_TOTAL, READ_STAGE_ELAPSED}; use crate::read::dedup::LastNonNullIter; @@ -250,7 +250,7 @@ impl Memtable for TimeSeriesMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { let projection = if let Some(projection) = projection { projection.iter().copied().collect() } else { @@ -268,7 +268,10 @@ impl Memtable for TimeSeriesMemtable { }); let context = Arc::new(MemtableRangeContext::new(self.id, builder)); - [(0, MemtableRange::new(context))].into() + MemtableRanges { + ranges: [(0, MemtableRange::new(context))].into(), + stats: self.stats(), + } } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/read.rs b/src/mito2/src/read.rs index d8ac5ce46b..c4de103f10 100644 --- a/src/mito2/src/read.rs +++ b/src/mito2/src/read.rs @@ -861,6 +861,18 @@ impl BatchBuilder { } } +impl From for BatchBuilder { + fn from(batch: Batch) -> Self { + Self { + primary_key: batch.primary_key, + timestamps: Some(batch.timestamps), + sequences: Some(batch.sequences), + op_types: Some(batch.op_types), + fields: batch.fields, + } + } +} + /// Async [Batch] reader and iterator wrapper. /// /// This is the data source for SST writers or internal readers. diff --git a/src/mito2/src/read/dedup.rs b/src/mito2/src/read/dedup.rs index c77d0c3fab..a29781b947 100644 --- a/src/mito2/src/read/dedup.rs +++ b/src/mito2/src/read/dedup.rs @@ -224,6 +224,12 @@ pub(crate) struct DedupMetrics { } /// Buffer to store fields in the last row to merge. +/// +/// Usage: +/// We should call `maybe_init()` to initialize the builder and then call `push_first_row()` +/// to push the first row of batches that the timestamp is the same as the row in this builder. +/// Finally we should call `merge_last_non_null()` to merge the last non-null fields and +/// return the merged batch. struct LastFieldsBuilder { /// Filter deleted rows. filter_deleted: bool, @@ -311,6 +317,16 @@ impl LastFieldsBuilder { return; } + // Both `maybe_init()` and `push_first_row()` can update the builder. If the delete + // op is not in the latest row, then we can't set the deletion flag in the `maybe_init()`. + // We must check the batch and update the deletion flag here to prevent + // the builder from merging non-null fields in rows that insert before the deleted row. + self.contains_deletion = batch.op_types().get_data(0).unwrap() == OpType::Delete as u8; + if self.contains_deletion { + // Deletes this row. + return; + } + let fields = batch.fields(); for (idx, value) in self.last_fields.iter_mut().enumerate() { if value.is_null() && !fields[idx].data.is_null(0) { @@ -323,7 +339,8 @@ impl LastFieldsBuilder { } /// Merges last non-null fields, builds a new batch and resets the builder. - /// It may overwrites the last row of the `buffer`. + /// It may overwrites the last row of the `buffer`. The `buffer` is the batch + /// that initialized the builder. fn merge_last_non_null( &mut self, buffer: Batch, @@ -1082,6 +1099,32 @@ mod tests { ); } + #[test] + fn test_last_non_null_strategy_delete_middle() { + let input = [ + new_batch_multi_fields(b"k1", &[1], &[7], &[OpType::Put], &[(Some(11), None)]), + new_batch_multi_fields(b"k1", &[1], &[4], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[1], &[1], &[OpType::Put], &[(Some(12), Some(1))]), + new_batch_multi_fields(b"k1", &[2], &[8], &[OpType::Put], &[(Some(21), None)]), + new_batch_multi_fields(b"k1", &[2], &[5], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[2], &[2], &[OpType::Put], &[(Some(22), Some(2))]), + new_batch_multi_fields(b"k1", &[3], &[9], &[OpType::Put], &[(Some(31), None)]), + new_batch_multi_fields(b"k1", &[3], &[6], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[3], &[3], &[OpType::Put], &[(Some(32), Some(3))]), + ]; + + let mut strategy = LastNonNull::new(true); + check_dedup_strategy( + &input, + &mut strategy, + &[ + new_batch_multi_fields(b"k1", &[1], &[7], &[OpType::Put], &[(Some(11), None)]), + new_batch_multi_fields(b"k1", &[2], &[8], &[OpType::Put], &[(Some(21), None)]), + new_batch_multi_fields(b"k1", &[3], &[9], &[OpType::Put], &[(Some(31), None)]), + ], + ); + } + #[test] fn test_last_non_null_iter_on_batch() { let input = [new_batch_multi_fields( diff --git a/src/mito2/src/read/last_row.rs b/src/mito2/src/read/last_row.rs index 79d035e032..1e2a6a5844 100644 --- a/src/mito2/src/read/last_row.rs +++ b/src/mito2/src/read/last_row.rs @@ -27,7 +27,7 @@ use crate::cache::{ use crate::error::Result; use crate::read::{Batch, BatchReader, BoxedBatchReader}; use crate::sst::file::FileId; -use crate::sst::parquet::reader::RowGroupReader; +use crate::sst::parquet::reader::{ReaderMetrics, RowGroupReader}; /// Reader to keep the last row for each time series. /// It assumes that batches from the input reader are @@ -115,6 +115,14 @@ impl RowGroupLastRowCachedReader { } } + /// Gets the underlying reader metrics if uncached. + pub(crate) fn metrics(&self) -> Option<&ReaderMetrics> { + match self { + RowGroupLastRowCachedReader::Hit(_) => None, + RowGroupLastRowCachedReader::Miss(reader) => Some(reader.metrics()), + } + } + /// Creates new Hit variant and updates metrics. fn new_hit(value: Arc) -> Self { selector_result_cache_hit(); @@ -234,6 +242,10 @@ impl RowGroupLastRowReader { }); cache.put_selector_result(self.key, value); } + + fn metrics(&self) -> &ReaderMetrics { + self.reader.metrics() + } } /// Push last row into `yielded_batches`. diff --git a/src/mito2/src/read/prune.rs b/src/mito2/src/read/prune.rs index cb0066e734..500cd14302 100644 --- a/src/mito2/src/read/prune.rs +++ b/src/mito2/src/read/prune.rs @@ -72,11 +72,21 @@ impl PruneReader { self.source = source; } - pub(crate) fn metrics(&mut self) -> &ReaderMetrics { + /// Merge metrics with the inner reader and return the merged metrics. + pub(crate) fn metrics(&self) -> ReaderMetrics { + let mut metrics = self.metrics.clone(); match &self.source { - Source::RowGroup(r) => r.metrics(), - Source::LastRow(_) => &self.metrics, + Source::RowGroup(r) => { + metrics.merge_from(r.metrics()); + } + Source::LastRow(r) => { + if let Some(inner_metrics) = r.metrics() { + metrics.merge_from(inner_metrics); + } + } } + + metrics } pub(crate) async fn next_batch(&mut self) -> Result> { diff --git a/src/mito2/src/read/range.rs b/src/mito2/src/read/range.rs index bdad5f8fef..1b29e196a2 100644 --- a/src/mito2/src/read/range.rs +++ b/src/mito2/src/read/range.rs @@ -24,7 +24,7 @@ use store_api::region_engine::PartitionRange; use crate::cache::CacheManager; use crate::error::Result; -use crate::memtable::{MemtableRange, MemtableRef}; +use crate::memtable::{MemtableRange, MemtableRanges, MemtableStats}; use crate::read::scan_region::ScanInput; use crate::sst::file::{overlaps, FileHandle, FileTimeRange}; use crate::sst::parquet::file_range::{FileRange, FileRangeContextRef}; @@ -175,7 +175,7 @@ impl RangeMeta { } } - fn push_unordered_mem_ranges(memtables: &[MemtableRef], ranges: &mut Vec) { + fn push_unordered_mem_ranges(memtables: &[MemRangeBuilder], ranges: &mut Vec) { // For append mode, we can parallelize reading memtables. for (memtable_index, memtable) in memtables.iter().enumerate() { let stats = memtable.stats(); @@ -270,7 +270,7 @@ impl RangeMeta { } } - fn push_seq_mem_ranges(memtables: &[MemtableRef], ranges: &mut Vec) { + fn push_seq_mem_ranges(memtables: &[MemRangeBuilder], ranges: &mut Vec) { // For non append-only mode, each range only contains one memtable by default. for (i, memtable) in memtables.iter().enumerate() { let stats = memtable.stats(); @@ -421,29 +421,38 @@ impl FileRangeBuilder { /// Builder to create mem ranges. pub(crate) struct MemRangeBuilder { /// Ranges of a memtable. - row_groups: BTreeMap, + ranges: MemtableRanges, } impl MemRangeBuilder { /// Builds a mem range builder from row groups. - pub(crate) fn new(row_groups: BTreeMap) -> Self { - Self { row_groups } + pub(crate) fn new(ranges: MemtableRanges) -> Self { + Self { ranges } } /// Builds mem ranges to read in the memtable. /// Negative `row_group_index` indicates all row groups. - fn build_ranges(&self, row_group_index: i64, ranges: &mut SmallVec<[MemtableRange; 2]>) { + pub(crate) fn build_ranges( + &self, + row_group_index: i64, + ranges: &mut SmallVec<[MemtableRange; 2]>, + ) { if row_group_index >= 0 { let row_group_index = row_group_index as usize; // Scans one row group. - let Some(range) = self.row_groups.get(&row_group_index) else { + let Some(range) = self.ranges.ranges.get(&row_group_index) else { return; }; ranges.push(range.clone()); } else { - ranges.extend(self.row_groups.values().cloned()); + ranges.extend(self.ranges.ranges.values().cloned()); } } + + /// Returns the statistics of the memtable. + pub(crate) fn stats(&self) -> &MemtableStats { + &self.ranges.stats + } } /// List to manages the builders to create file ranges. @@ -451,18 +460,15 @@ impl MemRangeBuilder { /// the list to different streams in the same partition. pub(crate) struct RangeBuilderList { num_memtables: usize, - mem_builders: Mutex>>, file_builders: Mutex>>>, } impl RangeBuilderList { /// Creates a new [ReaderBuilderList] with the given number of memtables and files. pub(crate) fn new(num_memtables: usize, num_files: usize) -> Self { - let mem_builders = (0..num_memtables).map(|_| None).collect(); let file_builders = (0..num_files).map(|_| None).collect(); Self { num_memtables, - mem_builders: Mutex::new(mem_builders), file_builders: Mutex::new(file_builders), } } @@ -488,26 +494,6 @@ impl RangeBuilderList { Ok(ranges) } - /// Builds mem ranges to read the row group at `index`. - pub(crate) fn build_mem_ranges( - &self, - input: &ScanInput, - index: RowGroupIndex, - ) -> SmallVec<[MemtableRange; 2]> { - let mut ranges = SmallVec::new(); - let mut mem_builders = self.mem_builders.lock().unwrap(); - match &mut mem_builders[index.index] { - Some(builder) => builder.build_ranges(index.row_group_index, &mut ranges), - None => { - let builder = input.prune_memtable(index.index); - builder.build_ranges(index.row_group_index, &mut ranges); - mem_builders[index.index] = Some(builder); - } - } - - ranges - } - fn get_file_builder(&self, index: usize) -> Option> { let file_builders = self.file_builders.lock().unwrap(); file_builders[index].clone() diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 19324f119f..091b9bc48c 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -24,6 +24,7 @@ use common_recordbatch::SendableRecordBatchStream; use common_telemetry::{debug, error, tracing, warn}; use common_time::range::TimestampRange; use datafusion_expr::utils::expr_to_columns; +use smallvec::SmallVec; use store_api::region_engine::{PartitionRange, RegionScannerRef}; use store_api::storage::{ScanRequest, TimeSeriesRowSelector}; use table::predicate::{build_time_range_predicate, Predicate}; @@ -35,7 +36,7 @@ use crate::cache::file_cache::FileCacheRef; use crate::cache::CacheManagerRef; use crate::config::DEFAULT_SCAN_CHANNEL_SIZE; use crate::error::Result; -use crate::memtable::MemtableRef; +use crate::memtable::MemtableRange; use crate::metrics::READ_SST_COUNT; use crate::read::compat::{self, CompatBatch}; use crate::read::projection::ProjectionMapper; @@ -328,6 +329,14 @@ impl ScanRegion { Some(p) => ProjectionMapper::new(&self.version.metadata, p.iter().copied())?, None => ProjectionMapper::all(&self.version.metadata)?, }; + // Get memtable ranges to scan. + let memtables = memtables + .into_iter() + .map(|mem| { + let ranges = mem.ranges(Some(mapper.column_ids()), Some(predicate.clone())); + MemRangeBuilder::new(ranges) + }) + .collect(); let input = ScanInput::new(self.access_layer, mapper) .with_time_range(Some(time_range)) @@ -346,8 +355,8 @@ impl ScanRegion { Ok(input) } - /// Build time range predicate from filters, also remove time filters from request. - fn build_time_range_predicate(&mut self) -> TimestampRange { + /// Build time range predicate from filters. + fn build_time_range_predicate(&self) -> TimestampRange { let time_index = self.version.metadata.time_index_column(); let unit = time_index .column_schema @@ -355,11 +364,7 @@ impl ScanRegion { .as_timestamp() .expect("Time index must have timestamp-compatible type") .unit(); - build_time_range_predicate( - &time_index.column_schema.name, - unit, - &mut self.request.filters, - ) + build_time_range_predicate(&time_index.column_schema.name, unit, &self.request.filters) } /// Remove field filters if the merge mode is [MergeMode::LastNonNull]. @@ -413,11 +418,15 @@ impl ScanRegion { .and_then(|c| c.index_cache()) .cloned(); + let puffin_metadata_cache = self + .cache_manager + .as_ref() + .and_then(|c| c.puffin_metadata_cache()) + .cloned(); + InvertedIndexApplierBuilder::new( self.access_layer.region_dir().to_string(), self.access_layer.object_store().clone(), - file_cache, - index_cache, self.version.metadata.as_ref(), self.version.metadata.inverted_indexed_column_ids( self.version @@ -429,6 +438,9 @@ impl ScanRegion { ), self.access_layer.puffin_manager_factory().clone(), ) + .with_file_cache(file_cache) + .with_index_cache(index_cache) + .with_puffin_metadata_cache(puffin_metadata_cache) .build(&self.request.filters) .inspect_err(|err| warn!(err; "Failed to build invereted index applier")) .ok() @@ -477,8 +489,8 @@ pub(crate) struct ScanInput { time_range: Option, /// Predicate to push down. pub(crate) predicate: Option, - /// Memtables to scan. - pub(crate) memtables: Vec, + /// Memtable range builders for memtables in the time range.. + pub(crate) memtables: Vec, /// Handles to SST files to scan. pub(crate) files: Vec, /// Cache. @@ -540,9 +552,9 @@ impl ScanInput { self } - /// Sets memtables to read. + /// Sets memtable range builders. #[must_use] - pub(crate) fn with_memtables(mut self, memtables: Vec) -> Self { + pub(crate) fn with_memtables(mut self, memtables: Vec) -> Self { self.memtables = memtables; self } @@ -660,11 +672,12 @@ impl ScanInput { Ok(sources) } - /// Prunes a memtable to scan and returns the builder to build readers. - pub(crate) fn prune_memtable(&self, mem_index: usize) -> MemRangeBuilder { - let memtable = &self.memtables[mem_index]; - let row_groups = memtable.ranges(Some(self.mapper.column_ids()), self.predicate.clone()); - MemRangeBuilder::new(row_groups) + /// Builds memtable ranges to scan by `index`. + pub(crate) fn build_mem_ranges(&self, index: RowGroupIndex) -> SmallVec<[MemtableRange; 2]> { + let memtable = &self.memtables[index.index]; + let mut ranges = SmallVec::new(); + memtable.build_ranges(index.row_group_index, &mut ranges); + ranges } /// Prunes a file to scan and returns the builder to build readers. @@ -678,7 +691,6 @@ impl ScanInput { .access_layer .read_sst(file.clone()) .predicate(self.predicate.clone()) - .time_range(self.time_range) .projection(Some(self.mapper.column_ids().to_vec())) .cache(self.cache_manager.clone()) .inverted_index_applier(self.inverted_index_applier.clone()) diff --git a/src/mito2/src/read/scan_util.rs b/src/mito2/src/read/scan_util.rs index df790d191a..77a9bb1612 100644 --- a/src/mito2/src/read/scan_util.rs +++ b/src/mito2/src/read/scan_util.rs @@ -137,10 +137,9 @@ pub(crate) fn scan_mem_ranges( part_metrics: PartitionMetrics, index: RowGroupIndex, time_range: FileTimeRange, - range_builder_list: Arc, ) -> impl Stream> { try_stream! { - let ranges = range_builder_list.build_mem_ranges(&stream_ctx.input, index); + let ranges = stream_ctx.input.build_mem_ranges(index); part_metrics.inc_num_mem_ranges(ranges.len()); for range in ranges { let build_reader_start = Instant::now(); @@ -181,8 +180,9 @@ pub(crate) fn scan_file_ranges( } yield batch; } - if let Source::PruneReader(mut reader) = source { - reader_metrics.merge_from(reader.metrics()); + if let Source::PruneReader(reader) = source { + let prune_metrics = reader.metrics(); + reader_metrics.merge_from(&prune_metrics); } } diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index bdf3a7d6b8..ca9291c0f6 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -403,7 +403,6 @@ fn build_sources( part_metrics.clone(), *index, range_meta.time_range, - range_builder_list.clone(), ); Box::pin(stream) as _ } else { diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs index 60e5ca5c7c..28e7d64add 100644 --- a/src/mito2/src/read/unordered_scan.rs +++ b/src/mito2/src/read/unordered_scan.rs @@ -97,7 +97,6 @@ impl UnorderedScan { part_metrics.clone(), *index, range_meta.time_range, - range_builder_list.clone(), ); for await batch in stream { yield batch; diff --git a/src/mito2/src/schedule/remote_job_scheduler.rs b/src/mito2/src/schedule/remote_job_scheduler.rs index 8f51a774d5..bfe31ef041 100644 --- a/src/mito2/src/schedule/remote_job_scheduler.rs +++ b/src/mito2/src/schedule/remote_job_scheduler.rs @@ -27,7 +27,7 @@ use crate::compaction::compactor::CompactionRegion; use crate::compaction::picker::PickerOutput; use crate::error::{CompactRegionSnafu, Error, ParseJobIdSnafu, Result}; use crate::manifest::action::RegionEdit; -use crate::metrics::COMPACTION_FAILURE_COUNT; +use crate::metrics::{COMPACTION_FAILURE_COUNT, INFLIGHT_COMPACTION_COUNT}; use crate::request::{ BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest, }; @@ -145,6 +145,7 @@ impl DefaultNotifier { #[async_trait::async_trait] impl Notifier for DefaultNotifier { async fn notify(&self, result: RemoteJobResult, waiters: Vec) { + INFLIGHT_COMPACTION_COUNT.dec(); match result { RemoteJobResult::CompactionJobResult(result) => { let notify = { diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs index 451ec44f1c..5a9932ab43 100644 --- a/src/mito2/src/sst/file.rs +++ b/src/mito2/src/sst/file.rs @@ -146,12 +146,33 @@ pub enum IndexType { } impl FileMeta { + /// Returns true if the file has an inverted index pub fn inverted_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::InvertedIndex) } + + /// Returns true if the file has a fulltext index pub fn fulltext_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::FulltextIndex) } + + /// Returns the size of the inverted index file + pub fn inverted_index_size(&self) -> Option { + if self.available_indexes.len() == 1 && self.inverted_index_available() { + Some(self.index_file_size) + } else { + None + } + } + + /// Returns the size of the fulltext index file + pub fn fulltext_index_size(&self) -> Option { + if self.available_indexes.len() == 1 && self.fulltext_index_available() { + Some(self.index_file_size) + } else { + None + } + } } /// Handle to a SST file. diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs index 76c7a71503..81251c91a5 100644 --- a/src/mito2/src/sst/file_purger.rs +++ b/src/mito2/src/sst/file_purger.rs @@ -185,7 +185,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.is_exist(&path).await.unwrap()); + assert!(!object_store.exists(&path).await.unwrap()); } #[tokio::test] @@ -247,7 +247,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.is_exist(&path).await.unwrap()); - assert!(!object_store.is_exist(&index_path).await.unwrap()); + assert!(!object_store.exists(&path).await.unwrap()); + assert!(!object_store.exists(&index_path).await.unwrap()); } } diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index a4f4ab9e44..1972f3d7ab 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -18,7 +18,7 @@ pub(crate) mod intermediate; pub(crate) mod inverted_index; pub(crate) mod puffin_manager; mod statistics; -mod store; +pub(crate) mod store; use std::num::NonZeroUsize; diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index cac3ffedd7..0542fd7a59 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -16,19 +16,23 @@ pub mod builder; use std::sync::Arc; +use common_base::range_read::RangeReader; use common_telemetry::warn; use index::inverted_index::format::reader::InvertedIndexBlobReader; use index::inverted_index::search::index_apply::{ ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext, }; use object_store::ObjectStore; +use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader}; use snafu::ResultExt; use store_api::storage::RegionId; use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey}; use crate::cache::index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef}; -use crate::error::{ApplyInvertedIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result}; +use crate::error::{ + ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result, +}; use crate::metrics::{INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE}; use crate::sst::file::FileId; use crate::sst::index::inverted_index::INDEX_BLOB_TYPE; @@ -60,6 +64,9 @@ pub(crate) struct InvertedIndexApplier { /// In-memory cache for inverted index. inverted_index_cache: Option, + + /// Puffin metadata cache. + puffin_metadata_cache: Option, } pub(crate) type InvertedIndexApplierRef = Arc; @@ -70,8 +77,6 @@ impl InvertedIndexApplier { region_dir: String, region_id: RegionId, store: ObjectStore, - file_cache: Option, - index_cache: Option, index_applier: Box, puffin_manager_factory: PuffinManagerFactory, ) -> Self { @@ -81,15 +86,37 @@ impl InvertedIndexApplier { region_dir, region_id, store, - file_cache, + file_cache: None, index_applier, puffin_manager_factory, - inverted_index_cache: index_cache, + inverted_index_cache: None, + puffin_metadata_cache: None, } } + /// Sets the file cache. + pub fn with_file_cache(mut self, file_cache: Option) -> Self { + self.file_cache = file_cache; + self + } + + /// Sets the index cache. + pub fn with_index_cache(mut self, index_cache: Option) -> Self { + self.inverted_index_cache = index_cache; + self + } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } + /// Applies predicates to the provided SST file id and returns the relevant row group ids - pub async fn apply(&self, file_id: FileId) -> Result { + pub async fn apply(&self, file_id: FileId, file_size_hint: Option) -> Result { let _timer = INDEX_APPLY_ELAPSED .with_label_values(&[TYPE_INVERTED_INDEX]) .start_timer(); @@ -99,19 +126,25 @@ impl InvertedIndexApplier { index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, }; - let blob = match self.cached_blob_reader(file_id).await { + let mut blob = match self.cached_blob_reader(file_id).await { Ok(Some(puffin_reader)) => puffin_reader, other => { if let Err(err) = other { warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.") } - self.remote_blob_reader(file_id).await? + self.remote_blob_reader(file_id, file_size_hint).await? } }; if let Some(index_cache) = &self.inverted_index_cache { + let file_size = if let Some(file_size) = file_size_hint { + file_size + } else { + blob.metadata().await.context(MetadataSnafu)?.content_length + }; let mut index_reader = CachedInvertedIndexBlobReader::new( file_id, + file_size, InvertedIndexBlobReader::new(blob), index_cache.clone(), ); @@ -156,13 +189,22 @@ impl InvertedIndexApplier { } /// Creates a blob reader from the remote index file. - async fn remote_blob_reader(&self, file_id: FileId) -> Result { - let puffin_manager = self.puffin_manager_factory.build(self.store.clone()); + async fn remote_blob_reader( + &self, + file_id: FileId, + file_size_hint: Option, + ) -> Result { + let puffin_manager = self + .puffin_manager_factory + .build(self.store.clone()) + .with_puffin_metadata_cache(self.puffin_metadata_cache.clone()); + let file_path = location::index_file_path(&self.region_dir, file_id); puffin_manager .reader(&file_path) .await .context(PuffinBuildReaderSnafu)? + .with_file_size_hint(file_size_hint) .blob(INDEX_BLOB_TYPE) .await .context(PuffinReadBlobSnafu)? @@ -219,12 +261,10 @@ mod tests { region_dir.clone(), RegionId::new(0, 0), object_store, - None, - None, Box::new(mock_index_applier), puffin_manager_factory, ); - let output = sst_index_applier.apply(file_id).await.unwrap(); + let output = sst_index_applier.apply(file_id, None).await.unwrap(); assert_eq!( output, ApplyOutput { @@ -261,12 +301,10 @@ mod tests { region_dir.clone(), RegionId::new(0, 0), object_store, - None, - None, Box::new(mock_index_applier), puffin_manager_factory, ); - let res = sst_index_applier.apply(file_id).await; + let res = sst_index_applier.apply(file_id, None).await; assert!(format!("{:?}", res.unwrap_err()).contains("Blob not found")); } } diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder.rs b/src/mito2/src/sst/index/inverted_index/applier/builder.rs index 603cf5aa23..653679b9fc 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder.rs @@ -28,6 +28,7 @@ use datatypes::value::Value; use index::inverted_index::search::index_apply::PredicatesIndexApplier; use index::inverted_index::search::predicate::Predicate; use object_store::ObjectStore; +use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use snafu::{OptionExt, ResultExt}; use store_api::metadata::RegionMetadata; use store_api::storage::ColumnId; @@ -65,6 +66,9 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> { /// Cache for inverted index. index_cache: Option, + + /// Cache for puffin metadata. + puffin_metadata_cache: Option, } impl<'a> InvertedIndexApplierBuilder<'a> { @@ -72,8 +76,6 @@ impl<'a> InvertedIndexApplierBuilder<'a> { pub fn new( region_dir: String, object_store: ObjectStore, - file_cache: Option, - index_cache: Option, metadata: &'a RegionMetadata, indexed_column_ids: HashSet, puffin_manager_factory: PuffinManagerFactory, @@ -81,15 +83,37 @@ impl<'a> InvertedIndexApplierBuilder<'a> { Self { region_dir, object_store, - file_cache, metadata, indexed_column_ids, output: HashMap::default(), - index_cache, puffin_manager_factory, + file_cache: None, + index_cache: None, + puffin_metadata_cache: None, } } + /// Sets the file cache. + pub fn with_file_cache(mut self, file_cache: Option) -> Self { + self.file_cache = file_cache; + self + } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } + + /// Sets the index cache. + pub fn with_index_cache(mut self, index_cache: Option) -> Self { + self.index_cache = index_cache; + self + } + /// Consumes the builder to construct an [`InvertedIndexApplier`], optionally returned based on /// the expressions provided. If no predicates match, returns `None`. pub fn build(mut self, exprs: &[Expr]) -> Result> { @@ -108,15 +132,18 @@ impl<'a> InvertedIndexApplierBuilder<'a> { .collect(); let applier = PredicatesIndexApplier::try_from(predicates); - Ok(Some(InvertedIndexApplier::new( - self.region_dir, - self.metadata.region_id, - self.object_store, - self.file_cache, - self.index_cache, - Box::new(applier.context(BuildIndexApplierSnafu)?), - self.puffin_manager_factory, - ))) + Ok(Some( + InvertedIndexApplier::new( + self.region_dir, + self.metadata.region_id, + self.object_store, + Box::new(applier.context(BuildIndexApplierSnafu)?), + self.puffin_manager_factory, + ) + .with_file_cache(self.file_cache) + .with_puffin_metadata_cache(self.puffin_metadata_cache) + .with_index_cache(self.index_cache), + )) } /// Recursively traverses expressions to collect predicates. @@ -322,8 +349,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs index 0a196e6f1a..51f7f001e2 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs @@ -75,8 +75,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -118,8 +116,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -144,8 +140,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -187,8 +181,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -214,8 +206,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs index cdaec9f94e..138b15b82e 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs @@ -231,8 +231,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -260,8 +258,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -280,8 +276,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -315,8 +309,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs index 1d07cca487..35a5caad56 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs @@ -137,8 +137,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -175,8 +173,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -204,8 +200,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -224,8 +218,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -244,8 +236,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -303,8 +293,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -341,8 +329,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs index 6a520ba401..224e10c452 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs @@ -68,8 +68,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -101,8 +99,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -126,8 +122,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -159,8 +153,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -186,8 +178,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs index 7fdf7f3de5..7148986e6d 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs @@ -62,8 +62,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -91,8 +89,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -120,8 +116,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -142,8 +136,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 6db1ef6e0b..15cba55c44 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -310,12 +310,14 @@ mod tests { use futures::future::BoxFuture; use object_store::services::Memory; use object_store::ObjectStore; + use puffin::puffin_manager::cache::PuffinMetadataCache; use puffin::puffin_manager::PuffinManager; use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder}; use store_api::storage::RegionId; use super::*; use crate::cache::index::InvertedIndexCache; + use crate::metrics::CACHE_BYTES; use crate::read::BatchColumn; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder; @@ -446,22 +448,23 @@ mod tests { move |expr| { let _d = &d; - let cache = Arc::new(InvertedIndexCache::new(10, 10)); + let cache = Arc::new(InvertedIndexCache::new(10, 10, 100)); + let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES)); let applier = InvertedIndexApplierBuilder::new( region_dir.clone(), object_store.clone(), - None, - Some(cache), ®ion_metadata, indexed_column_ids.clone(), factory.clone(), ) + .with_index_cache(Some(cache)) + .with_puffin_metadata_cache(Some(puffin_metadata_cache)) .build(&[expr]) .unwrap() .unwrap(); Box::pin(async move { applier - .apply(sst_file_id) + .apply(sst_file_id, None) .await .unwrap() .matched_segment_ids diff --git a/src/mito2/src/sst/index/store.rs b/src/mito2/src/sst/index/store.rs index 2750c69fc2..7322bd4db4 100644 --- a/src/mito2/src/sst/index/store.rs +++ b/src/mito2/src/sst/index/store.rs @@ -68,6 +68,7 @@ impl InstrumentedStore { path: path.to_string(), read_byte_count, read_count, + file_size_hint: None, }) } @@ -262,15 +263,27 @@ pub(crate) struct InstrumentedRangeReader<'a> { path: String, read_byte_count: &'a IntCounter, read_count: &'a IntCounter, + file_size_hint: Option, } #[async_trait] impl RangeReader for InstrumentedRangeReader<'_> { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + self.file_size_hint = Some(file_size_hint); + } + async fn metadata(&mut self) -> io::Result { - let stat = self.store.stat(&self.path).await?; - Ok(Metadata { - content_length: stat.content_length(), - }) + match self.file_size_hint { + Some(file_size_hint) => Ok(Metadata { + content_length: file_size_hint, + }), + None => { + let stat = self.store.stat(&self.path).await?; + Ok(Metadata { + content_length: stat.content_length(), + }) + } + } } async fn read(&mut self, range: Range) -> io::Result { diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index ae51a0d37c..3dd53ba645 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -24,7 +24,7 @@ use crate::sst::index::IndexOutput; use crate::sst::DEFAULT_WRITE_BUFFER_SIZE; pub(crate) mod file_range; -pub(crate) mod format; +pub mod format; pub(crate) mod helper; pub(crate) mod metadata; mod page_reader; diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index b73026a7a6..39153fce8d 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -23,11 +23,7 @@ use api::v1::SemanticType; use async_trait::async_trait; use common_recordbatch::filter::SimpleFilterEvaluator; use common_telemetry::{debug, warn}; -use common_time::range::TimestampRange; -use common_time::timestamp::TimeUnit; -use common_time::Timestamp; -use datafusion_common::ScalarValue; -use datafusion_expr::{Expr, Operator}; +use datafusion_expr::Expr; use datatypes::arrow::record_batch::RecordBatch; use datatypes::data_type::ConcreteDataType; use itertools::Itertools; @@ -42,7 +38,6 @@ use store_api::storage::ColumnId; use table::predicate::Predicate; use crate::cache::CacheManagerRef; -use crate::error; use crate::error::{ ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, ReadParquetSnafu, Result, }; @@ -74,8 +69,6 @@ pub struct ParquetReaderBuilder { object_store: ObjectStore, /// Predicate to push down. predicate: Option, - /// Time range to filter. - time_range: Option, /// Metadata of columns to read. /// /// `None` reads all columns. Due to schema change, the projection @@ -104,7 +97,6 @@ impl ParquetReaderBuilder { file_handle, object_store, predicate: None, - time_range: None, projection: None, cache_manager: None, inverted_index_applier: None, @@ -120,13 +112,6 @@ impl ParquetReaderBuilder { self } - /// Attaches the time range to the builder. - #[must_use] - pub fn time_range(mut self, time_range: Option) -> ParquetReaderBuilder { - self.time_range = time_range; - self - } - /// Attaches the projection to the builder. /// /// The reader only applies the projection to fields. @@ -238,7 +223,7 @@ impl ParquetReaderBuilder { cache_manager: self.cache_manager.clone(), }; - let mut filters = if let Some(predicate) = &self.predicate { + let filters = if let Some(predicate) = &self.predicate { predicate .exprs() .iter() @@ -254,10 +239,6 @@ impl ParquetReaderBuilder { vec![] }; - if let Some(time_range) = &self.time_range { - filters.extend(time_range_to_predicate(*time_range, ®ion_meta)?); - } - let codec = McmpRowCodec::new( read_format .metadata() @@ -475,8 +456,11 @@ impl ParquetReaderBuilder { if !self.file_handle.meta_ref().inverted_index_available() { return false; } - - let apply_output = match index_applier.apply(self.file_handle.file_id()).await { + let file_size_hint = self.file_handle.meta_ref().inverted_index_size(); + let apply_output = match index_applier + .apply(self.file_handle.file_id(), file_size_hint) + .await + { Ok(output) => output, Err(err) => { if cfg!(any(test, feature = "test")) { @@ -675,59 +659,6 @@ impl ParquetReaderBuilder { } } -/// Transforms time range into [SimpleFilterEvaluator]. -fn time_range_to_predicate( - time_range: TimestampRange, - metadata: &RegionMetadataRef, -) -> Result> { - let ts_col = metadata.time_index_column(); - let ts_col_id = ts_col.column_id; - - let ts_to_filter = |op: Operator, timestamp: &Timestamp| { - let value = match timestamp.unit() { - TimeUnit::Second => ScalarValue::TimestampSecond(Some(timestamp.value()), None), - TimeUnit::Millisecond => { - ScalarValue::TimestampMillisecond(Some(timestamp.value()), None) - } - TimeUnit::Microsecond => { - ScalarValue::TimestampMicrosecond(Some(timestamp.value()), None) - } - TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(timestamp.value()), None), - }; - let evaluator = SimpleFilterEvaluator::new(ts_col.column_schema.name.clone(), value, op) - .context(error::BuildTimeRangeFilterSnafu { - timestamp: *timestamp, - })?; - Ok(SimpleFilterContext::new( - evaluator, - ts_col_id, - SemanticType::Timestamp, - ts_col.column_schema.data_type.clone(), - )) - }; - - let predicates = match (time_range.start(), time_range.end()) { - (Some(start), Some(end)) => { - vec![ - ts_to_filter(Operator::GtEq, start)?, - ts_to_filter(Operator::Lt, end)?, - ] - } - - (Some(start), None) => { - vec![ts_to_filter(Operator::GtEq, start)?] - } - - (None, Some(end)) => { - vec![ts_to_filter(Operator::Lt, end)?] - } - (None, None) => { - vec![] - } - }; - Ok(predicates) -} - /// Metrics of filtering rows groups and rows. #[derive(Debug, Default, Clone, Copy)] pub(crate) struct ReaderFilterMetrics { @@ -915,10 +846,10 @@ enum ReaderState { impl ReaderState { /// Returns the metrics of the reader. - fn metrics(&mut self) -> &ReaderMetrics { + fn metrics(&self) -> ReaderMetrics { match self { ReaderState::Readable(reader) => reader.metrics(), - ReaderState::Exhausted(m) => m, + ReaderState::Exhausted(m) => m.clone(), } } } @@ -936,20 +867,6 @@ pub(crate) struct SimpleFilterContext { } impl SimpleFilterContext { - fn new( - filter: SimpleFilterEvaluator, - column_id: ColumnId, - semantic_type: SemanticType, - data_type: ConcreteDataType, - ) -> Self { - Self { - filter, - column_id, - semantic_type, - data_type, - } - } - /// Creates a context for the `expr`. /// /// Returns None if the column to filter doesn't exist in the SST metadata or the diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index dec175e76f..314e886ba9 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -35,8 +35,7 @@ use api::v1::{OpType, Row, Rows, SemanticType}; use common_base::readable_size::ReadableSize; use common_base::Plugins; use common_datasource::compression::CompressionType; -use common_meta::cache::{new_schema_cache, new_table_info_cache, new_table_schema_cache}; -use common_meta::key::schema_name::{SchemaName, SchemaNameValue}; +use common_meta::cache::{new_schema_cache, new_table_schema_cache}; use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef}; use common_meta::kv_backend::memory::MemoryKvBackend; use common_meta::kv_backend::KvBackendRef; @@ -49,7 +48,7 @@ use datatypes::schema::ColumnSchema; use log_store::kafka::log_store::KafkaLogStore; use log_store::raft_engine::log_store::RaftEngineLogStore; use log_store::test_util::log_store_util; -use moka::future::{Cache, CacheBuilder}; +use moka::future::CacheBuilder; use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef}; use object_store::services::Fs; use object_store::ObjectStore; diff --git a/src/mito2/src/test_util/memtable_util.rs b/src/mito2/src/test_util/memtable_util.rs index f1cc57aa3b..1a0eacecf8 100644 --- a/src/mito2/src/test_util/memtable_util.rs +++ b/src/mito2/src/test_util/memtable_util.rs @@ -35,7 +35,7 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBatch, DataBuffer}; use crate::memtable::{ BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRange, - MemtableRef, MemtableStats, + MemtableRanges, MemtableRef, MemtableStats, }; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; @@ -93,8 +93,8 @@ impl Memtable for EmptyMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - ) -> BTreeMap { - BTreeMap::new() + ) -> MemtableRanges { + MemtableRanges::default() } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs index 67295d6a64..8eee93966b 100644 --- a/src/mito2/src/worker.rs +++ b/src/mito2/src/worker.rs @@ -171,6 +171,8 @@ impl WorkerGroup { .selector_result_cache_size(config.selector_result_cache_size.as_bytes()) .index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes()) .index_content_size(config.inverted_index.content_cache_size.as_bytes()) + .index_content_page_size(config.inverted_index.content_cache_page_size.as_bytes()) + .puffin_metadata_size(config.index.metadata_cache_size.as_bytes()) .write_cache(write_cache) .build(), ); diff --git a/src/mito2/src/worker/handle_catchup.rs b/src/mito2/src/worker/handle_catchup.rs index f0fd6b0550..8992621dd7 100644 --- a/src/mito2/src/worker/handle_catchup.rs +++ b/src/mito2/src/worker/handle_catchup.rs @@ -16,8 +16,8 @@ use std::sync::Arc; -use common_telemetry::info; use common_telemetry::tracing::warn; +use common_telemetry::{debug, info}; use snafu::ensure; use store_api::logstore::LogStore; use store_api::region_engine::RegionRole; @@ -40,6 +40,7 @@ impl RegionWorkerLoop { }; if region.is_writable() { + debug!("Region {region_id} is writable, skip catchup"); return Ok(0); } // Note: Currently, We protect the split brain by ensuring the mutable table is empty. diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs index d4a13a1345..01eaf17652 100644 --- a/src/mito2/src/worker/handle_open.rs +++ b/src/mito2/src/worker/handle_open.rs @@ -51,7 +51,7 @@ impl RegionWorkerLoop { // Check if this region is pending drop. And clean the entire dir if so. if !self.dropping_regions.is_region_exists(region_id) && object_store - .is_exist(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) + .exists(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) .await .context(OpenDalSnafu)? { diff --git a/src/object-store/Cargo.toml b/src/object-store/Cargo.toml index 72e0e2bfbe..b82be7376a 100644 --- a/src/object-store/Cargo.toml +++ b/src/object-store/Cargo.toml @@ -17,8 +17,9 @@ futures.workspace = true lazy_static.workspace = true md5 = "0.7" moka = { workspace = true, features = ["future"] } -opendal = { version = "0.49", features = [ +opendal = { version = "0.50", features = [ "layers-tracing", + "layers-prometheus", "services-azblob", "services-fs", "services-gcs", diff --git a/src/object-store/src/layers.rs b/src/object-store/src/layers.rs index b2145aa6b0..20108ab63c 100644 --- a/src/object-store/src/layers.rs +++ b/src/object-store/src/layers.rs @@ -13,8 +13,37 @@ // limitations under the License. mod lru_cache; -mod prometheus; pub use lru_cache::*; pub use opendal::layers::*; -pub use prometheus::PrometheusMetricsLayer; +pub use prometheus::build_prometheus_metrics_layer; + +mod prometheus { + use std::sync::{Mutex, OnceLock}; + + use opendal::layers::PrometheusLayer; + + static PROMETHEUS_LAYER: OnceLock> = OnceLock::new(); + + pub fn build_prometheus_metrics_layer(with_path_label: bool) -> PrometheusLayer { + PROMETHEUS_LAYER + .get_or_init(|| { + // This logical tries to extract parent path from the object storage operation + // the function also relies on assumption that the region path is built from + // pattern `/catalog/schema/table_id/....` + // + // We'll get the data/catalog/schema from path. + let path_level = if with_path_label { 3 } else { 0 }; + + let layer = PrometheusLayer::builder() + .path_label(path_level) + .register_default() + .unwrap(); + + Mutex::new(layer) + }) + .lock() + .unwrap() + .clone() + } +} diff --git a/src/object-store/src/layers/lru_cache/read_cache.rs b/src/object-store/src/layers/lru_cache/read_cache.rs index f88b36784d..874b17280d 100644 --- a/src/object-store/src/layers/lru_cache/read_cache.rs +++ b/src/object-store/src/layers/lru_cache/read_cache.rs @@ -156,9 +156,12 @@ impl ReadCache { let size = entry.metadata().content_length(); OBJECT_STORE_LRU_CACHE_ENTRIES.inc(); OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64); - self.mem_cache - .insert(read_key.to_string(), ReadResult::Success(size as u32)) - .await; + // ignore root path + if entry.path() != "/" { + self.mem_cache + .insert(read_key.to_string(), ReadResult::Success(size as u32)) + .await; + } } Ok(self.cache_stat().await) diff --git a/src/object-store/src/layers/prometheus.rs b/src/object-store/src/layers/prometheus.rs deleted file mode 100644 index fef83a9146..0000000000 --- a/src/object-store/src/layers/prometheus.rs +++ /dev/null @@ -1,584 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! code originally from , make a tiny change to avoid crash in multi thread env - -use std::fmt::{Debug, Formatter}; - -use common_telemetry::debug; -use lazy_static::lazy_static; -use opendal::raw::*; -use opendal::{Buffer, ErrorKind}; -use prometheus::{ - exponential_buckets, histogram_opts, register_histogram_vec, register_int_counter_vec, - Histogram, HistogramTimer, HistogramVec, IntCounterVec, -}; - -use crate::util::extract_parent_path; - -type Result = std::result::Result; - -lazy_static! { - static ref REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( - "opendal_requests_total", - "Total times of all kinds of operation being called", - &["scheme", "operation", "path"], - ) - .unwrap(); - static ref REQUESTS_DURATION_SECONDS: HistogramVec = register_histogram_vec!( - histogram_opts!( - "opendal_requests_duration_seconds", - "Histogram of the time spent on specific operation", - exponential_buckets(0.01, 2.0, 16).unwrap() - ), - &["scheme", "operation", "path"] - ) - .unwrap(); - static ref BYTES_TOTAL: HistogramVec = register_histogram_vec!( - histogram_opts!( - "opendal_bytes_total", - "Total size of sync or async Read/Write", - exponential_buckets(0.01, 2.0, 16).unwrap() - ), - &["scheme", "operation", "path"] - ) - .unwrap(); -} - -#[inline] -fn increment_errors_total(op: Operation, kind: ErrorKind) { - debug!( - "Prometheus statistics metrics error, operation {} error {}", - op.into_static(), - kind.into_static() - ); -} - -/// Please refer to [prometheus](https://docs.rs/prometheus) for every operation. -/// -/// # Prometheus Metrics -/// -/// In this section, we will introduce three metrics that are currently being exported by opendal. These metrics are essential for understanding the behavior and performance of opendal. -/// -/// -/// | Metric Name | Type | Description | Labels | -/// |-----------------------------------|-----------|------------------------------------------------------|---------------------| -/// | opendal_requests_total | Counter | Total times of all kinds of operation being called | scheme, operation | -/// | opendal_requests_duration_seconds | Histogram | Histogram of the time spent on specific operation | scheme, operation | -/// | opendal_bytes_total | Histogram | Total size of sync or async Read/Write | scheme, operation | -/// -/// For a more detailed explanation of these metrics and how they are used, please refer to the [Prometheus documentation](https://prometheus.io/docs/introduction/overview/). -/// -/// # Histogram Configuration -/// -/// The metric buckets for these histograms are automatically generated based on the `exponential_buckets(0.01, 2.0, 16)` configuration. -#[derive(Default, Debug, Clone)] -pub struct PrometheusMetricsLayer { - pub path_label: bool, -} - -impl PrometheusMetricsLayer { - pub fn new(path_label: bool) -> Self { - Self { path_label } - } -} - -impl Layer for PrometheusMetricsLayer { - type LayeredAccess = PrometheusAccess; - - fn layer(&self, inner: A) -> Self::LayeredAccess { - let meta = inner.info(); - let scheme = meta.scheme(); - - PrometheusAccess { - inner, - scheme: scheme.to_string(), - path_label: self.path_label, - } - } -} - -#[derive(Clone)] -pub struct PrometheusAccess { - inner: A, - scheme: String, - path_label: bool, -} - -impl PrometheusAccess { - fn get_path_label<'a>(&self, path: &'a str) -> &'a str { - if self.path_label { - extract_parent_path(path) - } else { - "" - } - } -} - -impl Debug for PrometheusAccess { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PrometheusAccessor") - .field("inner", &self.inner) - .finish_non_exhaustive() - } -} - -impl LayeredAccess for PrometheusAccess { - type Inner = A; - type Reader = PrometheusMetricWrapper; - type BlockingReader = PrometheusMetricWrapper; - type Writer = PrometheusMetricWrapper; - type BlockingWriter = PrometheusMetricWrapper; - type Lister = A::Lister; - type BlockingLister = A::BlockingLister; - - fn inner(&self) -> &Self::Inner { - &self.inner - } - - async fn create_dir(&self, path: &str, args: OpCreateDir) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) - .start_timer(); - let create_res = self.inner.create_dir(path, args).await; - - timer.observe_duration(); - create_res.inspect_err(|e| { - increment_errors_total(Operation::CreateDir, e.kind()); - }) - } - - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) - .start_timer(); - - let (rp, r) = self.inner.read(path, args).await.inspect_err(|e| { - increment_errors_total(Operation::Read, e.kind()); - })?; - - Ok(( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Read, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::Read.into_static(), - path_label, - ]), - timer, - ), - )) - } - - async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) - .start_timer(); - - let (rp, r) = self.inner.write(path, args).await.inspect_err(|e| { - increment_errors_total(Operation::Write, e.kind()); - })?; - - Ok(( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Write, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::Write.into_static(), - path_label, - ]), - timer, - ), - )) - } - - async fn stat(&self, path: &str, args: OpStat) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) - .inc(); - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) - .start_timer(); - - let stat_res = self.inner.stat(path, args).await; - timer.observe_duration(); - stat_res.inspect_err(|e| { - increment_errors_total(Operation::Stat, e.kind()); - }) - } - - async fn delete(&self, path: &str, args: OpDelete) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) - .start_timer(); - - let delete_res = self.inner.delete(path, args).await; - timer.observe_duration(); - delete_res.inspect_err(|e| { - increment_errors_total(Operation::Delete, e.kind()); - }) - } - - async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) - .start_timer(); - - let list_res = self.inner.list(path, args).await; - - timer.observe_duration(); - list_res.inspect_err(|e| { - increment_errors_total(Operation::List, e.kind()); - }) - } - - async fn batch(&self, args: OpBatch) -> Result { - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) - .start_timer(); - let result = self.inner.batch(args).await; - - timer.observe_duration(); - result.inspect_err(|e| { - increment_errors_total(Operation::Batch, e.kind()); - }) - } - - async fn presign(&self, path: &str, args: OpPresign) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) - .start_timer(); - let result = self.inner.presign(path, args).await; - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::Presign, e.kind()); - }) - } - - fn blocking_create_dir(&self, path: &str, args: OpCreateDir) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingCreateDir.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingCreateDir.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_create_dir(path, args); - - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingCreateDir, e.kind()); - }) - } - - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]) - .start_timer(); - - self.inner - .blocking_read(path, args) - .map(|(rp, r)| { - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::BlockingRead, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]), - timer, - ), - ) - }) - .inspect_err(|e| { - increment_errors_total(Operation::BlockingRead, e.kind()); - }) - } - - fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]) - .start_timer(); - - self.inner - .blocking_write(path, args) - .map(|(rp, r)| { - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::BlockingWrite, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]), - timer, - ), - ) - }) - .inspect_err(|e| { - increment_errors_total(Operation::BlockingWrite, e.kind()); - }) - } - - fn blocking_stat(&self, path: &str, args: OpStat) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingStat.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingStat.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_stat(path, args); - timer.observe_duration(); - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingStat, e.kind()); - }) - } - - fn blocking_delete(&self, path: &str, args: OpDelete) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingDelete.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingDelete.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_delete(path, args); - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingDelete, e.kind()); - }) - } - - fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingList.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingList.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_list(path, args); - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingList, e.kind()); - }) - } -} - -pub struct PrometheusMetricWrapper { - inner: R, - - op: Operation, - bytes_counter: Histogram, - _requests_duration_timer: HistogramTimer, - bytes: u64, -} - -impl Drop for PrometheusMetricWrapper { - fn drop(&mut self) { - self.bytes_counter.observe(self.bytes as f64); - } -} - -impl PrometheusMetricWrapper { - fn new( - inner: R, - op: Operation, - bytes_counter: Histogram, - requests_duration_timer: HistogramTimer, - ) -> Self { - Self { - inner, - op, - bytes_counter, - _requests_duration_timer: requests_duration_timer, - bytes: 0, - } - } -} - -impl oio::Read for PrometheusMetricWrapper { - async fn read(&mut self) -> Result { - self.inner.read().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::BlockingRead for PrometheusMetricWrapper { - fn read(&mut self) -> opendal::Result { - self.inner.read().inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::Write for PrometheusMetricWrapper { - async fn write(&mut self, bs: Buffer) -> Result<()> { - let bytes = bs.len(); - match self.inner.write(bs).await { - Ok(_) => { - self.bytes += bytes as u64; - Ok(()) - } - Err(err) => { - increment_errors_total(self.op, err.kind()); - Err(err) - } - } - } - - async fn close(&mut self) -> Result<()> { - self.inner.close().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } - - async fn abort(&mut self) -> Result<()> { - self.inner.close().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::BlockingWrite for PrometheusMetricWrapper { - fn write(&mut self, bs: Buffer) -> Result<()> { - let bytes = bs.len(); - self.inner - .write(bs) - .map(|_| { - self.bytes += bytes as u64; - }) - .inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } - - fn close(&mut self) -> Result<()> { - self.inner.close().inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} diff --git a/src/object-store/src/util.rs b/src/object-store/src/util.rs index fc0a031ab9..271da33e85 100644 --- a/src/object-store/src/util.rs +++ b/src/object-store/src/util.rs @@ -15,19 +15,12 @@ use std::fmt::Display; use common_telemetry::{debug, error, trace}; -use futures::TryStreamExt; use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer}; use opendal::raw::{AccessorInfo, Operation}; -use opendal::{Entry, ErrorKind, Lister}; +use opendal::ErrorKind; -use crate::layers::PrometheusMetricsLayer; use crate::ObjectStore; -/// Collect all entries from the [Lister]. -pub async fn collect(stream: Lister) -> Result, opendal::Error> { - stream.try_collect::>().await -} - /// Join two paths and normalize the output dir. /// /// The output dir is always ends with `/`. e.g. @@ -127,26 +120,12 @@ pub fn normalize_path(path: &str) -> String { p } -// This logical tries to extract parent path from the object storage operation -// the function also relies on assumption that the region path is built from -// pattern `/catalog/schema/table_id/....` -// -// this implementation tries to extract at most 3 levels of parent path -pub(crate) fn extract_parent_path(path: &str) -> &str { - // split the path into `catalog`, `schema` and others - path.char_indices() - .filter(|&(_, c)| c == '/') - // we get the data/catalog/schema from path, split at the 3rd / - .nth(2) - .map_or(path, |(i, _)| &path[..i]) -} - /// Attaches instrument layers to the object store. pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore { object_store .layer(LoggingLayer::new(DefaultLoggingInterceptor)) .layer(TracingLayer) - .layer(PrometheusMetricsLayer::new(path_label)) + .layer(crate::layers::build_prometheus_metrics_layer(path_label)) } static LOGGING_TARGET: &str = "opendal::services"; @@ -263,28 +242,4 @@ mod tests { assert_eq!("/abc", join_path("//", "/abc")); assert_eq!("abc/def", join_path("abc/", "//def")); } - - #[test] - fn test_path_extraction() { - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public/1024/1024_0000000000/") - ); - - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public/1/") - ); - - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public") - ); - - assert_eq!("data/greptime/", extract_parent_path("data/greptime/")); - - assert_eq!("data/", extract_parent_path("data/")); - - assert_eq!("/", extract_parent_path("/")); - } } diff --git a/src/object-store/tests/object_store_test.rs b/src/object-store/tests/object_store_test.rs index 497decffab..7e81b965fb 100644 --- a/src/object-store/tests/object_store_test.rs +++ b/src/object-store/tests/object_store_test.rs @@ -65,23 +65,38 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> { store.write(p3, "Hello, object3!").await?; // List objects - let entries = store.list("/").await?; + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert_eq!(3, entries.len()); store.delete(p1).await?; store.delete(p3).await?; // List objects again - // Only o2 is exists - let entries = store.list("/").await?; + // Only o2 and root exist + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert_eq!(1, entries.len()); - assert_eq!(p2, entries.first().unwrap().path()); + assert_eq!(p2, entries[0].path()); let content = store.read(p2).await?; assert_eq!("Hello, object2!", String::from_utf8(content.to_vec())?); store.delete(p2).await?; - let entries = store.list("/").await?; + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert!(entries.is_empty()); assert!(store.read(p1).await.is_err()); @@ -252,7 +267,7 @@ async fn test_file_backend_with_lru_cache() -> Result<()> { async fn assert_lru_cache(cache_layer: &LruCacheLayer, file_names: &[&str]) { for file_name in file_names { - assert!(cache_layer.contains_file(file_name).await); + assert!(cache_layer.contains_file(file_name).await, "{file_name}"); } } @@ -264,7 +279,9 @@ async fn assert_cache_files( let (_, mut lister) = store.list("/", OpList::default()).await?; let mut objects = vec![]; while let Some(e) = lister.next().await? { - objects.push(e); + if e.mode() == EntryMode::FILE { + objects.push(e); + } } // compare the cache file with the expected cache file; ignore orders @@ -332,9 +349,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", ], &["Hello, object1!", "object2!", "Hello, object2!"], ) @@ -342,9 +359,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", ], ) .await; @@ -355,13 +372,13 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_eq!(cache_layer.read_cache_stat().await, (1, 15)); assert_cache_files( &cache_store, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], &["Hello, object1!"], ) .await?; assert_lru_cache( &cache_layer, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], ) .await; @@ -388,8 +405,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["Hello, object1!", "Hello, object3!", "Hello"], @@ -398,8 +415,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -416,7 +433,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_store, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["ello, object1!", "Hello, object3!", "Hello"], @@ -426,7 +443,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -448,7 +465,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index b3251ca6bf..ad842a40fe 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -59,6 +59,7 @@ use set::set_query_timeout; use snafu::{ensure, OptionExt, ResultExt}; use sql::statements::copy::{CopyDatabase, CopyDatabaseArgument, CopyTable, CopyTableArgument}; use sql::statements::set_variables::SetVariables; +use sql::statements::show::ShowCreateTableVariant; use sql::statements::statement::Statement; use sql::statements::OptionMap; use sql::util::format_raw_object_name; @@ -317,8 +318,16 @@ impl StatementExecutor { .context(TableNotFoundSnafu { table_name: &table })?; let table_name = TableName::new(catalog, schema, table); - self.show_create_table(table_name, table_ref, query_ctx) - .await + match show.variant { + ShowCreateTableVariant::Original => { + self.show_create_table(table_name, table_ref, query_ctx) + .await + } + ShowCreateTableVariant::PostgresForeignTable => { + self.show_create_table_for_pg(table_name, table_ref, query_ctx) + .await + } + } } Statement::ShowCreateFlow(show) => self.show_create_flow(show, query_ctx).await, Statement::ShowCreateView(show) => self.show_create_view(show, query_ctx).await, diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index ed96ca6f18..eba88ee44d 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -271,7 +271,8 @@ impl StatementExecutor { table_info.ident.table_id = table_id; - let table_info = Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); + let table_info: Arc = + Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); create_table.table_id = Some(api::v1::TableId { id: table_id }); let table = DistTable::table(table_info); diff --git a/src/operator/src/statement/show.rs b/src/operator/src/statement/show.rs index 210ec4e7f2..fe91c71abe 100644 --- a/src/operator/src/statement/show.rs +++ b/src/operator/src/statement/show.rs @@ -144,6 +144,26 @@ impl StatementExecutor { .context(ExecuteStatementSnafu) } + #[tracing::instrument(skip_all)] + pub async fn show_create_table_for_pg( + &self, + table_name: TableName, + table: TableRef, + query_ctx: QueryContextRef, + ) -> Result { + let table_info = table.table_info(); + if table_info.table_type != TableType::Base { + return error::ShowCreateTableBaseOnlySnafu { + table_name: table_name.to_string(), + table_type: table_info.table_type, + } + .fail(); + } + + query::sql::show_create_foreign_table_for_pg(table, query_ctx) + .context(ExecuteStatementSnafu) + } + #[tracing::instrument(skip_all)] pub async fn show_create_view( &self, diff --git a/src/pipeline/Cargo.toml b/src/pipeline/Cargo.toml index 4657f39a68..9c26d1a52f 100644 --- a/src/pipeline/Cargo.toml +++ b/src/pipeline/Cargo.toml @@ -63,7 +63,6 @@ yaml-rust = "0.4" catalog = { workspace = true, features = ["testing"] } criterion = { version = "0.4", features = ["html_reports"] } rayon = "1.0" -ron = "0.7" serde = { version = "1.0", features = ["derive"] } session = { workspace = true, features = ["testing"] } diff --git a/src/pipeline/benches/processor.rs b/src/pipeline/benches/processor.rs index 09462753d8..8cf221af5b 100644 --- a/src/pipeline/benches/processor.rs +++ b/src/pipeline/benches/processor.rs @@ -223,7 +223,7 @@ transform: type: uint32 "#; - parse(&Content::Yaml(pipeline_yaml.into())).unwrap() + parse(&Content::Yaml(pipeline_yaml)).unwrap() } fn criterion_benchmark(c: &mut Criterion) { diff --git a/src/pipeline/src/etl.rs b/src/pipeline/src/etl.rs index 9bd47a899e..45feb4b02f 100644 --- a/src/pipeline/src/etl.rs +++ b/src/pipeline/src/etl.rs @@ -37,9 +37,9 @@ const PROCESSORS: &str = "processors"; const TRANSFORM: &str = "transform"; const TRANSFORMS: &str = "transforms"; -pub enum Content { - Json(String), - Yaml(String), +pub enum Content<'a> { + Json(&'a str), + Yaml(&'a str), } pub fn parse(input: &Content) -> Result> @@ -379,8 +379,7 @@ transform: - field: field2 type: uint32 "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut payload).unwrap(); assert_eq!(&["my_field"].to_vec(), pipeline.required_keys()); @@ -432,8 +431,7 @@ transform: - field: ts type: timestamp, ns index: time"#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_str.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_str)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline .prepare(serde_json::Value::String(message), &mut payload) @@ -509,8 +507,7 @@ transform: type: uint32 "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut payload).unwrap(); assert_eq!(&["my_field"].to_vec(), pipeline.required_keys()); @@ -554,8 +551,7 @@ transform: index: time "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let schema = pipeline.schemas().clone(); let mut result = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut result).unwrap(); diff --git a/src/pipeline/src/etl/transform/transformer/greptime.rs b/src/pipeline/src/etl/transform/transformer/greptime.rs index 3b43696b5a..5d69a03ea2 100644 --- a/src/pipeline/src/etl/transform/transformer/greptime.rs +++ b/src/pipeline/src/etl/transform/transformer/greptime.rs @@ -15,6 +15,7 @@ pub mod coerce; use std::collections::HashSet; +use std::sync::Arc; use ahash::HashMap; use api::helper::proto_value_type; @@ -367,20 +368,15 @@ fn json_value_to_row( Ok(Row { values: row }) } -/// Identity pipeline for Greptime -/// This pipeline will convert the input JSON array to Greptime Rows -/// 1. The pipeline will add a default timestamp column to the schema -/// 2. The pipeline not resolve NULL value -/// 3. The pipeline assumes that the json format is fixed -/// 4. The pipeline will return an error if the same column datatype is mismatched -/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema. -pub fn identity_pipeline(array: Vec) -> Result { +fn identity_pipeline_inner<'a>( + array: Vec, + tag_column_names: Option>, +) -> Result { let mut rows = Vec::with_capacity(array.len()); - - let mut schema = SchemaInfo::default(); + let mut schema_info = SchemaInfo::default(); for value in array { if let serde_json::Value::Object(map) = value { - let row = json_value_to_row(&mut schema, map)?; + let row = json_value_to_row(&mut schema_info, map)?; rows.push(row); } } @@ -395,7 +391,7 @@ pub fn identity_pipeline(array: Vec) -> Result { let ts = GreptimeValue { value_data: Some(ValueData::TimestampNanosecondValue(ns)), }; - let column_count = schema.schema.len(); + let column_count = schema_info.schema.len(); for row in rows.iter_mut() { let diff = column_count - row.values.len(); for _ in 0..diff { @@ -403,15 +399,49 @@ pub fn identity_pipeline(array: Vec) -> Result { } row.values.push(ts.clone()); } - schema.schema.push(greptime_timestamp_schema); + schema_info.schema.push(greptime_timestamp_schema); + + // set the semantic type of the row key column to Tag + if let Some(tag_column_names) = tag_column_names { + tag_column_names.for_each(|tag_column_name| { + if let Some(index) = schema_info.index.get(tag_column_name) { + schema_info.schema[*index].semantic_type = SemanticType::Tag as i32; + } + }); + } Ok(Rows { - schema: schema.schema, + schema: schema_info.schema, rows, }) } +/// Identity pipeline for Greptime +/// This pipeline will convert the input JSON array to Greptime Rows +/// params table is used to set the semantic type of the row key column to Tag +/// 1. The pipeline will add a default timestamp column to the schema +/// 2. The pipeline not resolve NULL value +/// 3. The pipeline assumes that the json format is fixed +/// 4. The pipeline will return an error if the same column datatype is mismatched +/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema. +pub fn identity_pipeline( + array: Vec, + table: Option>, +) -> Result { + match table { + Some(table) => { + let table_info = table.table_info(); + let tag_column_names = table_info.meta.row_key_column_names(); + identity_pipeline_inner(array, Some(tag_column_names)) + } + None => identity_pipeline_inner(array, None::>), + } +} + #[cfg(test)] mod tests { + use api::v1::SemanticType; + + use crate::etl::transform::transformer::greptime::identity_pipeline_inner; use crate::identity_pipeline; #[test] @@ -437,7 +467,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_err()); assert_eq!( rows.err().unwrap().to_string(), @@ -465,7 +495,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_err()); assert_eq!( rows.err().unwrap().to_string(), @@ -493,7 +523,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_ok()); let rows = rows.unwrap(); assert_eq!(rows.schema.len(), 8); @@ -501,5 +531,58 @@ mod tests { assert_eq!(8, rows.rows[0].values.len()); assert_eq!(8, rows.rows[1].values.len()); } + { + let array = vec![ + serde_json::json!({ + "woshinull": null, + "name": "Alice", + "age": 20, + "is_student": true, + "score": 99.5, + "hobbies": "reading", + "address": "Beijing", + }), + serde_json::json!({ + "name": "Bob", + "age": 21, + "is_student": false, + "score": 88.5, + "hobbies": "swimming", + "address": "Shanghai", + "gaga": "gaga" + }), + ]; + let tag_column_names = ["name".to_string(), "address".to_string()]; + let rows = identity_pipeline_inner(array, Some(tag_column_names.iter())); + assert!(rows.is_ok()); + let rows = rows.unwrap(); + assert_eq!(rows.schema.len(), 8); + assert_eq!(rows.rows.len(), 2); + assert_eq!(8, rows.rows[0].values.len()); + assert_eq!(8, rows.rows[1].values.len()); + assert_eq!( + rows.schema + .iter() + .find(|x| x.column_name == "name") + .unwrap() + .semantic_type, + SemanticType::Tag as i32 + ); + assert_eq!( + rows.schema + .iter() + .find(|x| x.column_name == "address") + .unwrap() + .semantic_type, + SemanticType::Tag as i32 + ); + assert_eq!( + rows.schema + .iter() + .filter(|x| x.semantic_type == SemanticType::Tag as i32) + .count(), + 2 + ); + } } } diff --git a/src/pipeline/src/manager/pipeline_operator.rs b/src/pipeline/src/manager/pipeline_operator.rs index 2e838144a4..4f43b89e2e 100644 --- a/src/pipeline/src/manager/pipeline_operator.rs +++ b/src/pipeline/src/manager/pipeline_operator.rs @@ -243,4 +243,9 @@ impl PipelineOperator { }) .await } + + /// Compile a pipeline. + pub fn build_pipeline(pipeline: &str) -> Result> { + PipelineTable::compile_pipeline(pipeline) + } } diff --git a/src/pipeline/src/manager/table.rs b/src/pipeline/src/manager/table.rs index 7b3719b667..c2a36c63ec 100644 --- a/src/pipeline/src/manager/table.rs +++ b/src/pipeline/src/manager/table.rs @@ -203,7 +203,7 @@ impl PipelineTable { /// Compile a pipeline from a string. pub fn compile_pipeline(pipeline: &str) -> Result> { - let yaml_content = Content::Yaml(pipeline.into()); + let yaml_content = Content::Yaml(pipeline); parse::(&yaml_content).context(CompilePipelineSnafu) } diff --git a/src/pipeline/tests/common.rs b/src/pipeline/tests/common.rs index aa96d14d55..d825c91e4c 100644 --- a/src/pipeline/tests/common.rs +++ b/src/pipeline/tests/common.rs @@ -19,7 +19,7 @@ use pipeline::{parse, Content, GreptimeTransformer, Pipeline}; pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows { let input_value = serde_json::from_str::(input_str).unwrap(); - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline"); let mut result = pipeline.init_intermediate_state(); diff --git a/src/pipeline/tests/dissect.rs b/src/pipeline/tests/dissect.rs index 7577d58080..56386d0e86 100644 --- a/src/pipeline/tests/dissect.rs +++ b/src/pipeline/tests/dissect.rs @@ -270,7 +270,7 @@ transform: let input_value = serde_json::from_str::(input_str).unwrap(); - let yaml_content = pipeline::Content::Yaml(pipeline_yaml.into()); + let yaml_content = pipeline::Content::Yaml(pipeline_yaml); let pipeline: pipeline::Pipeline = pipeline::parse(&yaml_content).expect("failed to parse pipeline"); let mut result = pipeline.init_intermediate_state(); diff --git a/src/pipeline/tests/pipeline.rs b/src/pipeline/tests/pipeline.rs index e68c7b9e6a..de724e1a27 100644 --- a/src/pipeline/tests/pipeline.rs +++ b/src/pipeline/tests/pipeline.rs @@ -417,7 +417,7 @@ transform: .map(|(_, d)| GreptimeValue { value_data: d }) .collect::>(); - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline"); let mut stats = pipeline.init_intermediate_state(); @@ -487,7 +487,7 @@ transform: type: json "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -592,7 +592,7 @@ transform: type: json "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -655,7 +655,7 @@ transform: index: timestamp "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -691,7 +691,7 @@ transform: - message type: string "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index 4039328528..7b51651a7c 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -22,11 +22,8 @@ futures = "0.3" greptime-proto.workspace = true lazy_static.workspace = true prometheus.workspace = true -promql-parser.workspace = true prost.workspace = true snafu.workspace = true [dev-dependencies] -query.workspace = true -session = { workspace = true, features = ["testing"] } tokio.workspace = true diff --git a/src/puffin/Cargo.toml b/src/puffin/Cargo.toml index e4e6c74a5c..31c92ba4f9 100644 --- a/src/puffin/Cargo.toml +++ b/src/puffin/Cargo.toml @@ -25,6 +25,7 @@ futures.workspace = true lz4_flex = "0.11" moka = { workspace = true, features = ["future", "sync"] } pin-project.workspace = true +prometheus.workspace = true serde.workspace = true serde_json.workspace = true sha2 = "0.10.8" diff --git a/src/puffin/src/blob_metadata.rs b/src/puffin/src/blob_metadata.rs index bb2475bfa3..67eb62c5ff 100644 --- a/src/puffin/src/blob_metadata.rs +++ b/src/puffin/src/blob_metadata.rs @@ -68,6 +68,20 @@ pub struct BlobMetadata { pub properties: HashMap, } +impl BlobMetadata { + /// Calculates the memory usage of the blob metadata in bytes. + pub fn memory_usage(&self) -> usize { + self.blob_type.len() + + self.input_fields.len() * std::mem::size_of::() + + self + .properties + .iter() + .map(|(k, v)| k.len() + v.len()) + .sum::() + + std::mem::size_of::() + } +} + /// Compression codec used to compress the blob #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] diff --git a/src/puffin/src/error.rs b/src/puffin/src/error.rs index 57aec44d1f..634ede5b13 100644 --- a/src/puffin/src/error.rs +++ b/src/puffin/src/error.rs @@ -25,14 +25,6 @@ use snafu::{Location, Snafu}; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Failed to seek"))] - Seek { - #[snafu(source)] - error: IoError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to read"))] Read { #[snafu(source)] @@ -119,14 +111,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to convert bytes to integer"))] - BytesToInteger { - #[snafu(source)] - error: std::array::TryFromSliceError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unsupported decompression: {}", decompression))] UnsupportedDecompression { decompression: String, @@ -150,17 +134,15 @@ pub enum Error { location: Location, }, - #[snafu(display("Parse stage not match, expected: {}, actual: {}", expected, actual))] - ParseStageNotMatch { - expected: String, - actual: String, + #[snafu(display("Unexpected footer payload size: {}", size))] + UnexpectedFooterPayloadSize { + size: i32, #[snafu(implicit)] location: Location, }, - #[snafu(display("Unexpected footer payload size: {}", size))] - UnexpectedFooterPayloadSize { - size: i32, + #[snafu(display("Invalid puffin footer"))] + InvalidPuffinFooter { #[snafu(implicit)] location: Location, }, @@ -177,20 +159,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid blob offset: {}, location: {:?}", offset, location))] - InvalidBlobOffset { - offset: i64, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid blob area end: {}, location: {:?}", offset, location))] - InvalidBlobAreaEnd { - offset: u64, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to compress lz4"))] Lz4Compression { #[snafu(source)] @@ -262,8 +230,7 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - Seek { .. } - | Read { .. } + Read { .. } | MagicNotMatched { .. } | DeserializeJson { .. } | Write { .. } @@ -275,18 +242,15 @@ impl ErrorExt for Error { | Remove { .. } | Rename { .. } | SerializeJson { .. } - | BytesToInteger { .. } - | ParseStageNotMatch { .. } | UnexpectedFooterPayloadSize { .. } | UnexpectedPuffinFileSize { .. } - | InvalidBlobOffset { .. } - | InvalidBlobAreaEnd { .. } | Lz4Compression { .. } | Lz4Decompression { .. } | BlobNotFound { .. } | BlobIndexOutOfBound { .. } | FileKeyNotMatch { .. } - | WalkDir { .. } => StatusCode::Unexpected, + | WalkDir { .. } + | InvalidPuffinFooter { .. } => StatusCode::Unexpected, UnsupportedCompression { .. } | UnsupportedDecompression { .. } => { StatusCode::Unsupported diff --git a/src/puffin/src/file_format/reader.rs b/src/puffin/src/file_format/reader.rs index 3f48bf4b10..162d7116a5 100644 --- a/src/puffin/src/file_format/reader.rs +++ b/src/puffin/src/file_format/reader.rs @@ -21,21 +21,9 @@ use common_base::range_read::RangeReader; use crate::blob_metadata::BlobMetadata; use crate::error::Result; pub use crate::file_format::reader::file::PuffinFileReader; +pub use crate::file_format::reader::footer::PuffinFileFooterReader; use crate::file_metadata::FileMetadata; -/// `SyncReader` defines a synchronous reader for puffin data. -pub trait SyncReader<'a> { - type Reader: std::io::Read + std::io::Seek; - - /// Fetches the FileMetadata. - fn metadata(&'a mut self) -> Result; - - /// Reads particular blob data based on given metadata. - /// - /// Data read from the reader is compressed leaving the caller to decompress the data. - fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result; -} - /// `AsyncReader` defines an asynchronous reader for puffin data. #[async_trait] pub trait AsyncReader<'a> { diff --git a/src/puffin/src/file_format/reader/file.rs b/src/puffin/src/file_format/reader/file.rs index 3736ed5d2d..9ed40a7f18 100644 --- a/src/puffin/src/file_format/reader/file.rs +++ b/src/puffin/src/file_format/reader/file.rs @@ -12,20 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::{self, SeekFrom}; - use async_trait::async_trait; use common_base::range_read::RangeReader; use snafu::{ensure, ResultExt}; use crate::blob_metadata::BlobMetadata; -use crate::error::{ - MagicNotMatchedSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedPuffinFileSizeSnafu, - UnsupportedDecompressionSnafu, -}; -use crate::file_format::reader::footer::FooterParser; -use crate::file_format::reader::{AsyncReader, SyncReader}; -use crate::file_format::{MAGIC, MAGIC_SIZE, MIN_FILE_SIZE}; +use crate::error::{ReadSnafu, Result, UnexpectedPuffinFileSizeSnafu}; +use crate::file_format::reader::footer::DEFAULT_PREFETCH_SIZE; +use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader}; +use crate::file_format::MIN_FILE_SIZE; use crate::file_metadata::FileMetadata; use crate::partial_reader::PartialReader; @@ -51,6 +46,11 @@ impl PuffinFileReader { } } + pub fn with_metadata(mut self, metadata: Option) -> Self { + self.metadata = metadata; + self + } + fn validate_file_size(file_size: u64) -> Result<()> { ensure!( file_size >= MIN_FILE_SIZE, @@ -72,45 +72,6 @@ impl PuffinFileReader { } } -impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader { - type Reader = PartialReader<&'a mut R>; - - fn metadata(&mut self) -> Result { - if let Some(metadata) = &self.metadata { - return Ok(metadata.clone()); - } - - // check the magic - let mut magic = [0; MAGIC_SIZE as usize]; - self.source.read_exact(&mut magic).context(ReadSnafu)?; - ensure!(magic == MAGIC, MagicNotMatchedSnafu); - - let file_size = self.get_file_size_sync()?; - - // parse the footer - let metadata = FooterParser::new(&mut self.source, file_size).parse_sync()?; - self.metadata = Some(metadata.clone()); - Ok(metadata) - } - - fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result { - // TODO(zhongzc): support decompression - let compression = blob_metadata.compression_codec.as_ref(); - ensure!( - compression.is_none(), - UnsupportedDecompressionSnafu { - decompression: compression.unwrap().to_string() - } - ); - - Ok(PartialReader::new( - &mut self.source, - blob_metadata.offset as _, - blob_metadata.length as _, - )) - } -} - #[async_trait] impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { type Reader = PartialReader<&'a mut R>; @@ -119,17 +80,10 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { if let Some(metadata) = &self.metadata { return Ok(metadata.clone()); } - - // check the magic - let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?; - ensure!(*magic == MAGIC, MagicNotMatchedSnafu); - let file_size = self.get_file_size_async().await?; - - // parse the footer - let metadata = FooterParser::new(&mut self.source, file_size) - .parse_async() - .await?; + let mut reader = PuffinFileFooterReader::new(&mut self.source, file_size) + .with_prefetch_size(DEFAULT_PREFETCH_SIZE); + let metadata = reader.metadata().await?; self.metadata = Some(metadata.clone()); Ok(metadata) } @@ -143,14 +97,6 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { } } -impl PuffinFileReader { - fn get_file_size_sync(&mut self) -> Result { - let file_size = self.source.seek(SeekFrom::End(0)).context(SeekSnafu)?; - Self::validate_file_size(file_size)?; - Ok(file_size) - } -} - impl PuffinFileReader { async fn get_file_size_async(&mut self) -> Result { let file_size = self diff --git a/src/puffin/src/file_format/reader/footer.rs b/src/puffin/src/file_format/reader/footer.rs index aa764fd32a..d0cd1e8ed4 100644 --- a/src/puffin/src/file_format/reader/footer.rs +++ b/src/puffin/src/file_format/reader/footer.rs @@ -12,240 +12,98 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::{self, Cursor, SeekFrom}; +use std::io::Cursor; use common_base::range_read::RangeReader; use snafu::{ensure, ResultExt}; use crate::error::{ - BytesToIntegerSnafu, DeserializeJsonSnafu, InvalidBlobAreaEndSnafu, InvalidBlobOffsetSnafu, - Lz4DecompressionSnafu, MagicNotMatchedSnafu, ParseStageNotMatchSnafu, ReadSnafu, Result, - SeekSnafu, UnexpectedFooterPayloadSizeSnafu, + DeserializeJsonSnafu, InvalidPuffinFooterSnafu, Lz4DecompressionSnafu, MagicNotMatchedSnafu, + ReadSnafu, Result, UnexpectedFooterPayloadSizeSnafu, }; use crate::file_format::{Flags, FLAGS_SIZE, MAGIC, MAGIC_SIZE, MIN_FILE_SIZE, PAYLOAD_SIZE_SIZE}; use crate::file_metadata::FileMetadata; -/// Parser for the footer of a Puffin data file +/// The default prefetch size for the footer reader. +pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB + +/// Reader for the footer of a Puffin data file /// /// The footer has a specific layout that needs to be read and parsed to /// extract metadata about the file, which is encapsulated in the [`FileMetadata`] type. /// +/// This reader supports prefetching, allowing for more efficient reading +/// of the footer by fetching additional data ahead of time. +/// /// ```text /// Footer layout: HeadMagic Payload PayloadSize Flags FootMagic /// [4] [?] [4] [4] [4] /// ``` -pub struct FooterParser { - // The underlying IO source +pub struct PuffinFileFooterReader { + /// The source of the puffin file source: R, - - // The size of the file, used for calculating offsets to read from + /// The content length of the puffin file file_size: u64, + /// The prefetch footer size + prefetch_size: Option, } -impl FooterParser { - pub fn new(source: R, file_size: u64) -> Self { - Self { source, file_size } - } -} - -impl FooterParser { - /// Parses the footer from the IO source in a synchronous manner. - pub fn parse_sync(&mut self) -> Result { - let mut parser = StageParser::new(self.file_size); - - let mut buf = vec![]; - while let Some(byte_to_read) = parser.next_to_read() { - self.source - .seek(SeekFrom::Start(byte_to_read.offset)) - .context(SeekSnafu)?; - let size = byte_to_read.size as usize; - - buf.resize(size, 0); - let buf = &mut buf[..size]; - - self.source.read_exact(buf).context(ReadSnafu)?; - - parser.consume_bytes(buf)?; +impl<'a, R: RangeReader + 'a> PuffinFileFooterReader { + pub fn new(source: R, content_len: u64) -> Self { + Self { + source, + file_size: content_len, + prefetch_size: None, } - - parser.finish() } -} -impl FooterParser { - /// Parses the footer from the IO source in a asynchronous manner. - pub async fn parse_async(&mut self) -> Result { - let mut parser = StageParser::new(self.file_size); + fn prefetch_size(&self) -> u64 { + self.prefetch_size.unwrap_or(MIN_FILE_SIZE) + } - let mut buf = vec![]; - while let Some(byte_to_read) = parser.next_to_read() { - buf.clear(); - let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size; - self.source - .read_into(range, &mut buf) + pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self { + self.prefetch_size = Some(prefetch_size.max(MIN_FILE_SIZE)); + self + } + + pub async fn metadata(&'a mut self) -> Result { + // Note: prefetch > content_len is allowed, since we're using saturating_sub. + let footer_start = self.file_size.saturating_sub(self.prefetch_size()); + let suffix = self + .source + .read(footer_start..self.file_size) + .await + .context(ReadSnafu)?; + let suffix_len = suffix.len(); + + // check the magic + let magic = Self::read_tailing_four_bytes(&suffix)?; + ensure!(magic == MAGIC, MagicNotMatchedSnafu); + + let flags = self.decode_flags(&suffix[..suffix_len - MAGIC_SIZE as usize])?; + let length = self.decode_payload_size( + &suffix[..suffix_len - MAGIC_SIZE as usize - FLAGS_SIZE as usize], + )?; + let footer_size = PAYLOAD_SIZE_SIZE + FLAGS_SIZE + MAGIC_SIZE; + + // Did not fetch the entire file metadata in the initial read, need to make a second request. + if length > suffix_len as u64 - footer_size { + let metadata_start = self.file_size - length - footer_size; + let meta = self + .source + .read(metadata_start..self.file_size - footer_size) .await .context(ReadSnafu)?; - parser.consume_bytes(&buf)?; - } - - parser.finish() - } -} - -/// The internal stages of parsing the footer. -/// This enum allows the StageParser to keep track of which part -/// of the footer needs to be parsed next. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ParseStage { - FootMagic, - Flags, - PayloadSize, - Payload, - HeadMagic, - Done, -} - -/// Manages the parsing process of the file's footer. -struct StageParser { - /// Current stage in the parsing sequence of the footer. - stage: ParseStage, - - /// Total file size; used for calculating offsets to read from. - file_size: u64, - - /// Flags from the footer, set when the `Flags` field is parsed. - flags: Flags, - - /// Size of the footer's payload, set when the `PayloadSize` is parsed. - payload_size: u64, - - /// Metadata from the footer's payload, set when the `Payload` is parsed. - metadata: Option, -} - -/// Represents a read operation that needs to be performed, including the -/// offset from the start of the file and the number of bytes to read. -struct BytesToRead { - offset: u64, - size: u64, -} - -impl StageParser { - fn new(file_size: u64) -> Self { - Self { - stage: ParseStage::FootMagic, - file_size, - payload_size: 0, - flags: Flags::empty(), - metadata: None, + self.parse_payload(&flags, &meta) + } else { + let metadata_start = self.file_size - length - footer_size - footer_start; + let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize]; + self.parse_payload(&flags, meta) } } - /// Determines the next segment of bytes to read based on the current parsing stage. - /// This method returns information like the offset and size of the next read, - /// or None if parsing is complete. - fn next_to_read(&self) -> Option { - if self.stage == ParseStage::Done { - return None; - } - - let btr = match self.stage { - ParseStage::FootMagic => BytesToRead { - offset: self.foot_magic_offset(), - size: MAGIC_SIZE, - }, - ParseStage::Flags => BytesToRead { - offset: self.flags_offset(), - size: FLAGS_SIZE, - }, - ParseStage::PayloadSize => BytesToRead { - offset: self.payload_size_offset(), - size: PAYLOAD_SIZE_SIZE, - }, - ParseStage::Payload => BytesToRead { - offset: self.payload_offset(), - size: self.payload_size, - }, - ParseStage::HeadMagic => BytesToRead { - offset: self.head_magic_offset(), - size: MAGIC_SIZE, - }, - ParseStage::Done => unreachable!(), - }; - - Some(btr) - } - - /// Processes the bytes that have been read according to the current parsing stage - /// and advances the parsing stage. It ensures the correct sequence of bytes is - /// encountered and stores the necessary information in the `StageParser`. - fn consume_bytes(&mut self, bytes: &[u8]) -> Result<()> { - match self.stage { - ParseStage::FootMagic => { - ensure!(bytes == MAGIC, MagicNotMatchedSnafu); - self.stage = ParseStage::Flags; - } - ParseStage::Flags => { - self.flags = Self::parse_flags(bytes)?; - self.stage = ParseStage::PayloadSize; - } - ParseStage::PayloadSize => { - self.payload_size = Self::parse_payload_size(bytes)?; - self.validate_payload_size()?; - self.stage = ParseStage::Payload; - } - ParseStage::Payload => { - self.metadata = Some(self.parse_payload(bytes)?); - self.validate_metadata()?; - self.stage = ParseStage::HeadMagic; - } - ParseStage::HeadMagic => { - ensure!(bytes == MAGIC, MagicNotMatchedSnafu); - self.stage = ParseStage::Done; - } - ParseStage::Done => unreachable!(), - } - - Ok(()) - } - - /// Finalizes the parsing process, ensuring all stages are complete, and returns - /// the parsed `FileMetadata`. It converts the raw footer payload into structured data. - fn finish(self) -> Result { - ensure!( - self.stage == ParseStage::Done, - ParseStageNotMatchSnafu { - expected: format!("{:?}", ParseStage::Done), - actual: format!("{:?}", self.stage), - } - ); - - Ok(self.metadata.unwrap()) - } - - fn parse_flags(bytes: &[u8]) -> Result { - let n = u32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?); - Ok(Flags::from_bits_truncate(n)) - } - - fn parse_payload_size(bytes: &[u8]) -> Result { - let n = i32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?); - ensure!(n >= 0, UnexpectedFooterPayloadSizeSnafu { size: n }); - Ok(n as u64) - } - - fn validate_payload_size(&self) -> Result<()> { - ensure!( - self.payload_size <= self.file_size - MIN_FILE_SIZE, - UnexpectedFooterPayloadSizeSnafu { - size: self.payload_size as i32 - } - ); - Ok(()) - } - - fn parse_payload(&self, bytes: &[u8]) -> Result { - if self.flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) { + fn parse_payload(&self, flags: &Flags, bytes: &[u8]) -> Result { + if flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) { let decoder = lz4_flex::frame::FrameDecoder::new(Cursor::new(bytes)); let res = serde_json::from_reader(decoder).context(Lz4DecompressionSnafu)?; Ok(res) @@ -254,54 +112,35 @@ impl StageParser { } } - fn validate_metadata(&self) -> Result<()> { - let metadata = self.metadata.as_ref().expect("metadata is not set"); + fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> { + let suffix_len = suffix.len(); + ensure!(suffix_len >= 4, InvalidPuffinFooterSnafu); + let mut bytes = [0; 4]; + bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]); - let mut next_blob_offset = MAGIC_SIZE; - // check blob offsets - for blob in &metadata.blobs { - ensure!( - blob.offset as u64 == next_blob_offset, - InvalidBlobOffsetSnafu { - offset: blob.offset - } - ); - next_blob_offset += blob.length as u64; - } + Ok(bytes) + } + + fn decode_flags(&self, suffix: &[u8]) -> Result { + let flags = u32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?); + Ok(Flags::from_bits_truncate(flags)) + } + + fn decode_payload_size(&self, suffix: &[u8]) -> Result { + let payload_size = i32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?); - let blob_area_end = metadata - .blobs - .last() - .map_or(MAGIC_SIZE, |b| (b.offset + b.length) as u64); ensure!( - blob_area_end == self.head_magic_offset(), - InvalidBlobAreaEndSnafu { - offset: blob_area_end + payload_size >= 0, + UnexpectedFooterPayloadSizeSnafu { size: payload_size } + ); + let payload_size = payload_size as u64; + ensure!( + payload_size <= self.file_size - MIN_FILE_SIZE, + UnexpectedFooterPayloadSizeSnafu { + size: self.file_size as i32 } ); - Ok(()) - } - - fn foot_magic_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - } - - fn flags_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - FLAGS_SIZE - } - - fn payload_size_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - } - - fn payload_offset(&self) -> u64 { - // `validate_payload_size` ensures that this subtraction will not overflow - self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size - } - - fn head_magic_offset(&self) -> u64 { - // `validate_payload_size` ensures that this subtraction will not overflow - self.file_size - MAGIC_SIZE * 2 - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size + Ok(payload_size) } } diff --git a/src/puffin/src/file_metadata.rs b/src/puffin/src/file_metadata.rs index 74eea3aa08..4804c65be4 100644 --- a/src/puffin/src/file_metadata.rs +++ b/src/puffin/src/file_metadata.rs @@ -33,6 +33,22 @@ pub struct FileMetadata { pub properties: HashMap, } +impl FileMetadata { + /// Calculates the memory usage of the file metadata in bytes. + pub fn memory_usage(&self) -> usize { + self.blobs + .iter() + .map(|blob| blob.memory_usage()) + .sum::() + + self + .properties + .iter() + .map(|(k, v)| k.len() + v.len()) + .sum::() + + std::mem::size_of::() + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/puffin/src/partial_reader/async.rs b/src/puffin/src/partial_reader/async.rs index 3de40cb3a1..4eedd1ee31 100644 --- a/src/puffin/src/partial_reader/async.rs +++ b/src/puffin/src/partial_reader/async.rs @@ -23,6 +23,10 @@ use crate::partial_reader::PartialReader; #[async_trait] impl RangeReader for PartialReader { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.size, diff --git a/src/puffin/src/puffin_manager.rs b/src/puffin/src/puffin_manager.rs index 7bd5e9039d..204bc2c66e 100644 --- a/src/puffin/src/puffin_manager.rs +++ b/src/puffin/src/puffin_manager.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod cache; pub mod file_accessor; pub mod fs_puffin_manager; pub mod stager; @@ -72,11 +73,12 @@ pub struct PutOptions { /// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file. #[async_trait] -#[auto_impl::auto_impl(Arc)] pub trait PuffinReader { type Blob: BlobGuard; type Dir: DirGuard; + fn with_file_size_hint(self, file_size_hint: Option) -> Self; + /// Reads a blob from the Puffin file. /// /// The returned `BlobGuard` is used to access the blob data. diff --git a/src/puffin/src/puffin_manager/cache.rs b/src/puffin/src/puffin_manager/cache.rs new file mode 100644 index 0000000000..66fcb36bf9 --- /dev/null +++ b/src/puffin/src/puffin_manager/cache.rs @@ -0,0 +1,60 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use prometheus::IntGaugeVec; + +use crate::file_metadata::FileMetadata; +/// Metrics for index metadata. +const PUFFIN_METADATA_TYPE: &str = "puffin_metadata"; + +pub type PuffinMetadataCacheRef = Arc; + +/// A cache for storing the metadata of the index files. +pub struct PuffinMetadataCache { + cache: moka::sync::Cache>, +} + +fn puffin_metadata_weight(k: &String, v: &Arc) -> u32 { + (k.as_bytes().len() + v.memory_usage()) as u32 +} + +impl PuffinMetadataCache { + pub fn new(capacity: u64, cache_bytes: &'static IntGaugeVec) -> Self { + common_telemetry::debug!("Building PuffinMetadataCache with capacity: {capacity}"); + Self { + cache: moka::sync::CacheBuilder::new(capacity) + .name("puffin_metadata") + .weigher(puffin_metadata_weight) + .eviction_listener(|k, v, _cause| { + let size = puffin_metadata_weight(&k, &v); + cache_bytes + .with_label_values(&[PUFFIN_METADATA_TYPE]) + .sub(size.into()); + }) + .build(), + } + } + + /// Gets the metadata from the cache. + pub fn get_metadata(&self, file_id: &str) -> Option> { + self.cache.get(file_id) + } + + /// Puts the metadata into the cache. + pub fn put_metadata(&self, file_id: String, metadata: Arc) { + self.cache.insert(file_id, metadata); + } +} diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager.rs b/src/puffin/src/puffin_manager/fs_puffin_manager.rs index 976eb23997..52190f92fb 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager.rs @@ -21,6 +21,7 @@ pub use reader::FsPuffinReader; pub use writer::FsPuffinWriter; use crate::error::Result; +use crate::puffin_manager::cache::PuffinMetadataCacheRef; use crate::puffin_manager::file_accessor::PuffinFileAccessor; use crate::puffin_manager::stager::Stager; use crate::puffin_manager::PuffinManager; @@ -31,16 +32,29 @@ pub struct FsPuffinManager { stager: S, /// The puffin file accessor. puffin_file_accessor: F, + /// The puffin metadata cache. + puffin_metadata_cache: Option, } impl FsPuffinManager { - /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`. + /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`, + /// and optionally with a `puffin_metadata_cache`. pub fn new(stager: S, puffin_file_accessor: F) -> Self { Self { stager, puffin_file_accessor, + puffin_metadata_cache: None, } } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } } #[async_trait] @@ -57,6 +71,7 @@ where puffin_file_name.to_string(), self.stager.clone(), self.puffin_file_accessor.clone(), + self.puffin_metadata_cache.clone(), )) } diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs index 3de27fdb77..a5da2f75f8 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs @@ -14,6 +14,7 @@ use std::io; use std::ops::Range; +use std::sync::Arc; use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; @@ -23,12 +24,14 @@ use futures::io::BufReader; use futures::{AsyncRead, AsyncWrite}; use snafu::{ensure, OptionExt, ResultExt}; +use super::PuffinMetadataCacheRef; use crate::blob_metadata::{BlobMetadata, CompressionCodec}; use crate::error::{ BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu, MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu, }; use crate::file_format::reader::{AsyncReader, PuffinFileReader}; +use crate::file_metadata::FileMetadata; use crate::partial_reader::PartialReader; use crate::puffin_manager::file_accessor::PuffinFileAccessor; use crate::puffin_manager::fs_puffin_manager::dir_meta::DirMetadata; @@ -40,19 +43,32 @@ pub struct FsPuffinReader { /// The name of the puffin file. puffin_file_name: String, + /// The file size hint. + file_size_hint: Option, + /// The stager. stager: S, /// The puffin file accessor. puffin_file_accessor: F, + + /// The puffin file metadata cache. + puffin_file_metadata_cache: Option, } impl FsPuffinReader { - pub(crate) fn new(puffin_file_name: String, stager: S, puffin_file_accessor: F) -> Self { + pub(crate) fn new( + puffin_file_name: String, + stager: S, + puffin_file_accessor: F, + puffin_file_metadata_cache: Option, + ) -> Self { Self { puffin_file_name, + file_size_hint: None, stager, puffin_file_accessor, + puffin_file_metadata_cache, } } } @@ -66,20 +82,28 @@ where type Blob = Either, S::Blob>; type Dir = S::Dir; + fn with_file_size_hint(mut self, file_size_hint: Option) -> Self { + self.file_size_hint = file_size_hint; + self + } + async fn blob(&self, key: &str) -> Result { - let reader = self + let mut reader = self .puffin_file_accessor .reader(&self.puffin_file_name) .await?; + if let Some(file_size_hint) = self.file_size_hint { + reader.with_file_size_hint(file_size_hint); + } let mut file = PuffinFileReader::new(reader); - // TODO(zhongzc): cache the metadata. - let metadata = file.metadata().await?; + let metadata = self.get_puffin_file_metadata(&mut file).await?; let blob_metadata = metadata .blobs - .into_iter() + .iter() .find(|m| m.blob_type == key) - .context(BlobNotFoundSnafu { blob: key })?; + .context(BlobNotFoundSnafu { blob: key })? + .clone(); let blob = if blob_metadata.compression_codec.is_none() { // If the blob is not compressed, we can directly read it from the puffin file. @@ -133,6 +157,23 @@ where S: Stager, F: PuffinFileAccessor + Clone, { + async fn get_puffin_file_metadata( + &self, + reader: &mut PuffinFileReader, + ) -> Result> { + if let Some(cache) = self.puffin_file_metadata_cache.as_ref() { + if let Some(metadata) = cache.get_metadata(&self.puffin_file_name) { + return Ok(metadata); + } + } + + let metadata = Arc::new(reader.metadata().await?); + if let Some(cache) = self.puffin_file_metadata_cache.as_ref() { + cache.put_metadata(self.puffin_file_name.to_string(), metadata.clone()); + } + Ok(metadata) + } + async fn init_blob_to_stager( reader: PuffinFileReader, blob_metadata: BlobMetadata, @@ -274,6 +315,13 @@ where A: RangeReader, B: RangeReader, { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + match self { + Either::L(a) => a.with_file_size_hint(file_size_hint), + Either::R(b) => b.with_file_size_hint(file_size_hint), + } + } + async fn metadata(&mut self) -> io::Result { match self { Either::L(a) => a.metadata().await, diff --git a/src/puffin/src/tests.rs b/src/puffin/src/tests.rs index a152d4124b..a3bb485879 100644 --- a/src/puffin/src/tests.rs +++ b/src/puffin/src/tests.rs @@ -13,26 +13,14 @@ // limitations under the License. use std::collections::HashMap; -use std::fs::File; -use std::io::{Cursor, Read}; use std::vec; use common_base::range_read::{FileReader, RangeReader}; use futures::io::Cursor as AsyncCursor; -use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader}; -use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter}; - -#[test] -fn test_read_empty_puffin_sync() { - let path = "src/tests/resources/empty-puffin-uncompressed.puffin"; - - let file = File::open(path).unwrap(); - let mut reader = PuffinFileReader::new(file); - let metadata = reader.metadata().unwrap(); - assert_eq!(metadata.properties.len(), 0); - assert_eq!(metadata.blobs.len(), 0); -} +use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader, PuffinFileReader}; +use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter}; +use crate::file_metadata::FileMetadata; #[tokio::test] async fn test_read_empty_puffin_async() { @@ -45,39 +33,37 @@ async fn test_read_empty_puffin_async() { assert_eq!(metadata.blobs.len(), 0); } -#[test] -fn test_sample_metric_data_puffin_sync() { - let path = "src/tests/resources/sample-metric-data-uncompressed.puffin"; +async fn test_read_puffin_file_metadata( + path: &str, + file_size: u64, + expeccted_metadata: FileMetadata, +) { + for prefetch_size in [0, file_size / 2, file_size, file_size + 10] { + let reader = FileReader::new(path).await.unwrap(); + let mut footer_reader = PuffinFileFooterReader::new(reader, file_size); + if prefetch_size > 0 { + footer_reader = footer_reader.with_prefetch_size(prefetch_size); + } + let metadata = footer_reader.metadata().await.unwrap(); + assert_eq!(metadata.properties, expeccted_metadata.properties,); + assert_eq!(metadata.blobs, expeccted_metadata.blobs); + } +} - let file = File::open(path).unwrap(); - let mut reader = PuffinFileReader::new(file); - let metadata = reader.metadata().unwrap(); +#[tokio::test] +async fn test_read_puffin_file_metadata_async() { + let paths = vec![ + "src/tests/resources/empty-puffin-uncompressed.puffin", + "src/tests/resources/sample-metric-data-uncompressed.puffin", + ]; + for path in paths { + let mut reader = FileReader::new(path).await.unwrap(); + let file_size = reader.metadata().await.unwrap().content_length; + let mut reader = PuffinFileReader::new(reader); + let metadata = reader.metadata().await.unwrap(); - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 2); - assert_eq!(metadata.blobs[0].blob_type, "some-blob"); - assert_eq!(metadata.blobs[0].offset, 4); - assert_eq!(metadata.blobs[0].length, 9); - - assert_eq!(metadata.blobs[1].blob_type, "some-other-blob"); - assert_eq!(metadata.blobs[1].offset, 13); - assert_eq!(metadata.blobs[1].length, 83); - - let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap(); - let mut buf = String::new(); - some_blob.read_to_string(&mut buf).unwrap(); - assert_eq!(buf, "abcdefghi"); - - let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap(); - let mut buf = Vec::new(); - some_other_blob.read_to_end(&mut buf).unwrap(); - let expected = include_bytes!("tests/resources/sample-metric-data.blob"); - assert_eq!(buf, expected); + test_read_puffin_file_metadata(path, file_size, metadata).await; + } } #[tokio::test] @@ -113,38 +99,6 @@ async fn test_sample_metric_data_puffin_async() { assert_eq!(buf, expected); } -#[test] -fn test_writer_reader_with_empty_sync() { - fn test_writer_reader_with_empty_sync(footer_compressed: bool) { - let mut buf = Cursor::new(vec![]); - - let mut writer = PuffinFileWriter::new(&mut buf); - writer.set_properties(HashMap::from([( - "created-by".to_string(), - "Test 1234".to_string(), - )])); - - writer.set_footer_lz4_compressed(footer_compressed); - let written_bytes = writer.finish().unwrap(); - assert!(written_bytes > 0); - - let mut buf = Cursor::new(buf.into_inner()); - let mut reader = PuffinFileReader::new(&mut buf); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 0); - } - - test_writer_reader_with_empty_sync(false); - test_writer_reader_with_empty_sync(true); -} - #[tokio::test] async fn test_writer_reader_empty_async() { async fn test_writer_reader_empty_async(footer_compressed: bool) { @@ -176,76 +130,6 @@ async fn test_writer_reader_empty_async() { test_writer_reader_empty_async(true).await; } -#[test] -fn test_writer_reader_sync() { - fn test_writer_reader_sync(footer_compressed: bool) { - let mut buf = Cursor::new(vec![]); - - let mut writer = PuffinFileWriter::new(&mut buf); - - let blob1 = "abcdefghi"; - writer - .add_blob(Blob { - compressed_data: Cursor::new(&blob1), - blob_type: "some-blob".to_string(), - properties: Default::default(), - compression_codec: None, - }) - .unwrap(); - - let blob2 = include_bytes!("tests/resources/sample-metric-data.blob"); - writer - .add_blob(Blob { - compressed_data: Cursor::new(&blob2), - blob_type: "some-other-blob".to_string(), - properties: Default::default(), - compression_codec: None, - }) - .unwrap(); - - writer.set_properties(HashMap::from([( - "created-by".to_string(), - "Test 1234".to_string(), - )])); - - writer.set_footer_lz4_compressed(footer_compressed); - let written_bytes = writer.finish().unwrap(); - assert!(written_bytes > 0); - - let mut buf = Cursor::new(buf.into_inner()); - let mut reader = PuffinFileReader::new(&mut buf); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 2); - assert_eq!(metadata.blobs[0].blob_type, "some-blob"); - assert_eq!(metadata.blobs[0].offset, 4); - assert_eq!(metadata.blobs[0].length, 9); - - assert_eq!(metadata.blobs[1].blob_type, "some-other-blob"); - assert_eq!(metadata.blobs[1].offset, 13); - assert_eq!(metadata.blobs[1].length, 83); - - let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap(); - let mut buf = String::new(); - some_blob.read_to_string(&mut buf).unwrap(); - assert_eq!(buf, blob1); - - let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap(); - let mut buf = Vec::new(); - some_other_blob.read_to_end(&mut buf).unwrap(); - assert_eq!(buf, blob2); - } - - test_writer_reader_sync(false); - test_writer_reader_sync(true); -} - #[tokio::test] async fn test_writer_reader_async() { async fn test_writer_reader_async(footer_compressed: bool) { diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 863a5a1c33..8139ea3aaf 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -67,13 +67,11 @@ tokio.workspace = true uuid.workspace = true [dev-dependencies] -approx_eq = "0.1" arrow.workspace = true catalog = { workspace = true, features = ["testing"] } common-macro.workspace = true common-query = { workspace = true, features = ["testing"] } fastrand = "2.0" -format_num = "0.1" num = "0.4" num-traits = "0.2" paste = "1.0" @@ -83,8 +81,6 @@ serde.workspace = true serde_json.workspace = true session = { workspace = true, features = ["testing"] } statrs = "0.16" -stats-cli = "3.0" store-api.workspace = true -streaming-stats = "0.2" table = { workspace = true, features = ["testing"] } tokio-stream.workspace = true diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index 8166400b8f..45378e532c 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -146,6 +146,7 @@ impl Categorizer { | Expr::Between(_) | Expr::Sort(_) | Expr::Exists(_) + | Expr::InList(_) | Expr::ScalarFunction(_) => Commutativity::Commutative, Expr::Like(_) @@ -157,7 +158,6 @@ impl Categorizer { | Expr::TryCast(_) | Expr::AggregateFunction(_) | Expr::WindowFunction(_) - | Expr::InList(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::Wildcard { .. } => Commutativity::Unimplemented, diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 7e246d11c3..e696008cf5 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -316,6 +316,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to get SKIPPING index options"))] + GetSkippingIndexOptions { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -366,7 +373,9 @@ impl ErrorExt for Error { MissingTableMutationHandler { .. } => StatusCode::Unexpected, GetRegionMetadata { .. } => StatusCode::RegionNotReady, TableReadOnly { .. } => StatusCode::Unsupported, - GetFulltextOptions { source, .. } => source.status_code(), + GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } => { + source.status_code() + } } } diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 001e41ca99..bfdfb5981a 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -689,6 +689,13 @@ impl PromPlanner { let mut matches = label_matchers.find_matchers(METRIC_NAME); ensure!(!matches.is_empty(), NoMetricMatcherSnafu); ensure!(matches.len() == 1, MultipleMetricMatchersSnafu); + ensure!( + matches[0].op == MatchOp::Equal, + UnsupportedMatcherOpSnafu { + matcher_op: matches[0].op.to_string(), + matcher: METRIC_NAME + } + ); metric_name = matches.pop().map(|m| m.value); } @@ -1213,7 +1220,7 @@ impl PromPlanner { let quantile_expr = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Float64(Some(quantile)))) => quantile, other => UnexpectedPlanExprSnafu { - desc: format!("expect f64 literal as quantile, but found {:?}", other), + desc: format!("expected f64 literal as quantile, but found {:?}", other), } .fail()?, }; @@ -1224,7 +1231,7 @@ impl PromPlanner { Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t as i64, Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t, other => UnexpectedPlanExprSnafu { - desc: format!("expect i64 literal as t, but found {:?}", other), + desc: format!("expected i64 literal as t, but found {:?}", other), } .fail()?, }; @@ -1235,7 +1242,7 @@ impl PromPlanner { Some(DfExpr::Literal(ScalarValue::Float64(Some(sf)))) => sf, other => UnexpectedPlanExprSnafu { desc: format!( - "expect f64 literal as smoothing factor, but found {:?}", + "expected f64 literal as smoothing factor, but found {:?}", other ), } @@ -1244,7 +1251,10 @@ impl PromPlanner { let tf_exp = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Float64(Some(tf)))) => tf, other => UnexpectedPlanExprSnafu { - desc: format!("expect f64 literal as trend factor, but found {:?}", other), + desc: format!( + "expected f64 literal as trend factor, but found {:?}", + other + ), } .fail()?, }; @@ -1331,6 +1341,47 @@ impl PromPlanner { exprs.push(date_part_expr); ScalarFunc::GeneratedExpr } + + "label_join" => { + let (concat_expr, dst_label) = + Self::build_concat_labels_expr(&mut other_input_exprs, session_state)?; + + // Reserve the current field columns except the `dst_label`. + for value in &self.ctx.field_columns { + if *value != dst_label { + let expr = DfExpr::Column(Column::from_name(value)); + exprs.push(expr); + } + } + + // Remove it from tag columns + self.ctx.tag_columns.retain(|tag| *tag != dst_label); + + // Add the new label expr + exprs.push(concat_expr); + + ScalarFunc::GeneratedExpr + } + "label_replace" => { + let (replace_expr, dst_label) = + Self::build_regexp_replace_label_expr(&mut other_input_exprs, session_state)?; + + // Reserve the current field columns except the `dst_label`. + for value in &self.ctx.field_columns { + if *value != dst_label { + let expr = DfExpr::Column(Column::from_name(value)); + exprs.push(expr); + } + } + + // Remove it from tag columns + self.ctx.tag_columns.retain(|tag| *tag != dst_label); + + // Add the new label expr + exprs.push(replace_expr); + + ScalarFunc::GeneratedExpr + } _ => { if let Some(f) = session_state.scalar_functions().get(func.name) { ScalarFunc::DataFusionBuiltin(f.clone()) @@ -1411,6 +1462,7 @@ impl PromPlanner { // update value columns' name, and alias them to remove qualifiers let mut new_field_columns = Vec::with_capacity(exprs.len()); + exprs = exprs .into_iter() .map(|expr| { @@ -1420,11 +1472,146 @@ impl PromPlanner { }) .collect::, _>>() .context(DataFusionPlanningSnafu)?; + self.ctx.field_columns = new_field_columns; Ok(exprs) } + /// Build expr for `label_replace` function + fn build_regexp_replace_label_expr( + other_input_exprs: &mut VecDeque, + session_state: &SessionState, + ) -> Result<(DfExpr, String)> { + // label_replace(vector, dst_label, replacement, src_label, regex) + let dst_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected dst_label string literal, but found {:?}", other), + } + .fail()?, + }; + let replacement = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, + other => UnexpectedPlanExprSnafu { + desc: format!("expected replacement string literal, but found {:?}", other), + } + .fail()?, + }; + let src_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(s)))) => s, + other => UnexpectedPlanExprSnafu { + desc: format!("expected src_label string literal, but found {:?}", other), + } + .fail()?, + }; + let regex = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, + other => UnexpectedPlanExprSnafu { + desc: format!("expected regex string literal, but found {:?}", other), + } + .fail()?, + }; + + let func = session_state + .scalar_functions() + .get("regexp_replace") + .context(UnsupportedExprSnafu { + name: "regexp_replace", + })?; + + // regexp_replace(src_label, regex, replacement) + let args = vec![ + if src_label.is_empty() { + DfExpr::Literal(ScalarValue::Null) + } else { + DfExpr::Column(Column::from_name(src_label)) + }, + DfExpr::Literal(ScalarValue::Utf8(Some(regex))), + DfExpr::Literal(ScalarValue::Utf8(Some(replacement))), + ]; + + Ok(( + DfExpr::ScalarFunction(ScalarFunction { + func: func.clone(), + args, + }) + .alias(&dst_label), + dst_label, + )) + } + + /// Build expr for `label_join` function + fn build_concat_labels_expr( + other_input_exprs: &mut VecDeque, + session_state: &SessionState, + ) -> Result<(DfExpr, String)> { + // label_join(vector, dst_label, separator, src_label_1, src_label_2, ...) + + let dst_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected dst_label string literal, but found {:?}", other), + } + .fail()?, + }; + let separator = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected separator string literal, but found {:?}", other), + } + .fail()?, + }; + let src_labels = other_input_exprs + .clone() + .into_iter() + .map(|expr| { + // Cast source label into column + match expr { + DfExpr::Literal(ScalarValue::Utf8(Some(label))) => { + if label.is_empty() { + Ok(DfExpr::Literal(ScalarValue::Null)) + } else { + Ok(DfExpr::Column(Column::from_name(label))) + } + } + other => UnexpectedPlanExprSnafu { + desc: format!( + "expected source label string literal, but found {:?}", + other + ), + } + .fail(), + } + }) + .collect::>>()?; + ensure!( + !src_labels.is_empty(), + FunctionInvalidArgumentSnafu { + fn_name: "label_join", + } + ); + + let func = session_state + .scalar_functions() + .get("concat_ws") + .context(UnsupportedExprSnafu { name: "concat_ws" })?; + + // concat_ws(separator, src_label_1, src_label_2, ...) as dst_label + let mut args = Vec::with_capacity(1 + src_labels.len()); + args.push(DfExpr::Literal(ScalarValue::Utf8(Some(separator)))); + args.extend(src_labels); + + Ok(( + DfExpr::ScalarFunction(ScalarFunction { + func: func.clone(), + args, + }) + .alias(&dst_label), + dst_label, + )) + } + fn create_time_index_column_expr(&self) -> Result { Ok(DfExpr::Column(Column::from_name( self.ctx @@ -3267,4 +3454,74 @@ mod test { \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]" ); } + + #[tokio::test] + async fn test_label_join() { + let prom_expr = parser::parse( + "label_join(up{tag_0='api-server'}, 'foo', ',', 'tag_1', 'tag_2', 'tag_3')", + ) + .unwrap(); + let eval_stmt = EvalStmt { + expr: prom_expr, + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + + let table_provider = + build_test_table_provider(&[(DEFAULT_SCHEMA_NAME.to_string(), "up".to_string())], 4, 1) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + + let expected = r#"Filter: field_0 IS NOT NULL AND foo IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8] + Projection: up.timestamp, up.field_0 AS field_0, concat_ws(Utf8(","), up.tag_1, up.tag_2, up.tag_3) AS foo AS foo, up.tag_0, up.tag_1, up.tag_2, up.tag_3 [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesDivide: tags=["tag_0", "tag_1", "tag_2", "tag_3"] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Sort: up.tag_0 DESC NULLS LAST, up.tag_1 DESC NULLS LAST, up.tag_2 DESC NULLS LAST, up.tag_3 DESC NULLS LAST, up.timestamp DESC NULLS LAST [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Filter: up.tag_0 = Utf8("api-server") AND up.timestamp >= TimestampMillisecond(-1000, None) AND up.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + TableScan: up [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"#; + + assert_eq!(plan.display_indent_schema().to_string(), expected); + } + + #[tokio::test] + async fn test_label_replace() { + let prom_expr = parser::parse( + "label_replace(up{tag_0=\"a:c\"}, \"foo\", \"$1\", \"tag_0\", \"(.*):.*\")", + ) + .unwrap(); + let eval_stmt = EvalStmt { + expr: prom_expr, + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + + let table_provider = + build_test_table_provider(&[(DEFAULT_SCHEMA_NAME.to_string(), "up".to_string())], 1, 1) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + + let expected = r#"Filter: field_0 IS NOT NULL AND foo IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8] + Projection: up.timestamp, up.field_0 AS field_0, regexp_replace(up.tag_0, Utf8("(.*):.*"), Utf8("$1")) AS foo AS foo, up.tag_0 [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesDivide: tags=["tag_0"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Sort: up.tag_0 DESC NULLS LAST, up.timestamp DESC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Filter: up.tag_0 = Utf8("a:c") AND up.timestamp >= TimestampMillisecond(-1000, None) AND up.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + TableScan: up [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"#; + + assert_eq!(plan.display_indent_schema().to_string(), expected); + } } diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 062bd8e14e..7525bb904b 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -40,11 +40,12 @@ use common_recordbatch::RecordBatches; use common_time::timezone::get_timezone; use common_time::Timestamp; use datafusion::common::ScalarValue; -use datafusion::prelude::SessionContext; +use datafusion::prelude::{concat_ws, SessionContext}; use datafusion_expr::{case, col, lit, Expr}; use datatypes::prelude::*; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema, Schema}; use datatypes::vectors::StringVector; +use itertools::Itertools; use object_store::ObjectStore; use once_cell::sync::Lazy; use regex::Regex; @@ -61,6 +62,7 @@ use sql::statements::show::{ use sql::statements::statement::Statement; use sql::statements::OptionMap; use sqlparser::ast::ObjectName; +use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::requests::{FILE_TABLE_LOCATION_KEY, FILE_TABLE_PATTERN_KEY}; use table::TableRef; @@ -400,6 +402,20 @@ pub async fn show_index( query_ctx.current_schema() }; + let fulltext_index_expr = case(col("constraint_name").like(lit("%FULLTEXT INDEX%"))) + .when(lit(true), lit("greptime-fulltext-index-v1")) + .otherwise(null()) + .context(error::PlanSqlSnafu)?; + + let inverted_index_expr = case( + col("constraint_name") + .like(lit("%INVERTED INDEX%")) + .or(col("constraint_name").like(lit("%PRIMARY%"))), + ) + .when(lit(true), lit("greptime-inverted-index-v1")) + .otherwise(null()) + .context(error::PlanSqlSnafu)?; + let select = vec![ // 1 as `Non_unique`: contain duplicates lit(1).alias(INDEX_NONT_UNIQUE_COLUMN), @@ -417,8 +433,11 @@ pub async fn show_index( .otherwise(lit(YES_STR)) .context(error::PlanSqlSnafu)? .alias(COLUMN_NULLABLE_COLUMN), - // TODO(dennis): maybe 'BTREE'? - lit("greptime-inverted-index-v1").alias(INDEX_INDEX_TYPE_COLUMN), + concat_ws( + lit(", "), + vec![inverted_index_expr.clone(), fulltext_index_expr.clone()], + ) + .alias(INDEX_INDEX_TYPE_COLUMN), lit("").alias(COLUMN_COMMENT_COLUMN), lit("").alias(INDEX_COMMENT_COLUMN), lit(YES_STR).alias(INDEX_VISIBLE_COLUMN), @@ -746,6 +765,52 @@ pub fn show_create_table( Ok(Output::new_with_record_batches(records)) } +pub fn show_create_foreign_table_for_pg( + table: TableRef, + _query_ctx: QueryContextRef, +) -> Result { + let table_info = table.table_info(); + + let table_meta = &table_info.meta; + let table_name = &table_info.name; + let schema = &table_info.meta.schema; + let is_metric_engine = is_metric_engine(&table_meta.engine); + + let columns = schema + .column_schemas() + .iter() + .filter_map(|c| { + if is_metric_engine && is_metric_engine_internal_column(&c.name) { + None + } else { + Some(format!( + "\"{}\" {}", + c.name, + c.data_type.postgres_datatype_name() + )) + } + }) + .join(",\n "); + + let sql = format!( + r#"CREATE FOREIGN TABLE ft_{} ( + {} +) +SERVER greptimedb +OPTIONS (table_name '{}')"#, + table_name, columns, table_name + ); + + let columns = vec![ + Arc::new(StringVector::from(vec![table_name.clone()])) as _, + Arc::new(StringVector::from(vec![sql])) as _, + ]; + let records = RecordBatches::try_from_columns(SHOW_CREATE_TABLE_OUTPUT_SCHEMA.clone(), columns) + .context(error::CreateRecordBatchSnafu)?; + + Ok(Output::new_with_record_batches(records)) +} + pub fn show_create_view( view_name: ObjectName, definition: &str, diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index ca69dfc5e6..b903509d22 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -19,7 +19,8 @@ use std::collections::HashMap; use common_meta::SchemaOptions; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, }; use snafu::ResultExt; use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; @@ -32,7 +33,8 @@ use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; use crate::error::{ - ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu, + ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, + GetSkippingIndexOptionsSnafu, Result, SqlSnafu, }; /// Generates CREATE TABLE options from given table metadata and schema-level options. @@ -115,6 +117,23 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result, - ln_offset: usize, - filename: &str, - eval_ctx: &EvalContext, -) -> StdResult { - let res = exec_coprocessor(script, rb, eval_ctx); - res.map_err(|e| { - crate::python::error::pretty_print_error_in_src(script, &e, ln_offset, filename) - }) -} - #[cfg(test)] mod tests { use crate::python::ffi_types::copr::parse::parse_and_compile_copr; diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index ddfeaf27bd..a90fb880e2 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -134,7 +134,6 @@ table.workspace = true tempfile = "3.0.0" tokio-postgres = "0.7" tokio-postgres-rustls = "0.12" -tokio-test = "0.4" [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.13", features = ["criterion", "flamegraph"] } diff --git a/src/servers/dashboard/VERSION b/src/servers/dashboard/VERSION index 63f2359f64..3d105a6fd8 100644 --- a/src/servers/dashboard/VERSION +++ b/src/servers/dashboard/VERSION @@ -1 +1 @@ -v0.7.1 +v0.7.3 diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index 6682a1c789..071de93683 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -189,6 +189,13 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to parse query"))] + FailedToParseQuery { + #[snafu(implicit)] + location: Location, + source: sql::error::Error, + }, + #[snafu(display("Failed to parse InfluxDB line protocol"))] InfluxdbLineProtocol { #[snafu(implicit)] @@ -651,7 +658,8 @@ impl ErrorExt for Error { | OpenTelemetryLog { .. } | UnsupportedJsonDataTypeForTag { .. } | InvalidTableName { .. } - | PrepareStatementNotFound { .. } => StatusCode::InvalidArguments, + | PrepareStatementNotFound { .. } + | FailedToParseQuery { .. } => StatusCode::InvalidArguments, Catalog { source, .. } => source.status_code(), RowWriter { source, .. } => source.status_code(), diff --git a/src/servers/src/grpc/database.rs b/src/servers/src/grpc/database.rs index 572f3c66f4..121d8c6c85 100644 --- a/src/servers/src/grpc/database.rs +++ b/src/servers/src/grpc/database.rs @@ -20,13 +20,11 @@ use common_error::status_code::StatusCode; use common_query::OutputData; use common_telemetry::{debug, warn}; use futures::StreamExt; -use tonic::metadata::{KeyAndValueRef, MetadataMap}; use tonic::{Request, Response, Status, Streaming}; use crate::grpc::greptime_handler::GreptimeRequestHandler; use crate::grpc::{cancellation, TonicResult}; - -pub const GREPTIME_DB_HEADER_HINT_PREFIX: &str = "x-greptime-hint-"; +use crate::hint_headers; pub(crate) struct DatabaseService { handler: GreptimeRequestHandler, @@ -45,7 +43,7 @@ impl GreptimeDatabase for DatabaseService { request: Request, ) -> TonicResult> { let remote_addr = request.remote_addr(); - let hints = extract_hints(request.metadata()); + let hints = hint_headers::extract_hints(request.metadata()); debug!( "GreptimeDatabase::Handle: request from {:?} with hints: {:?}", remote_addr, hints @@ -91,7 +89,7 @@ impl GreptimeDatabase for DatabaseService { request: Request>, ) -> Result, Status> { let remote_addr = request.remote_addr(); - let hints = extract_hints(request.metadata()); + let hints = hint_headers::extract_hints(request.metadata()); debug!( "GreptimeDatabase::HandleRequests: request from {:?} with hints: {:?}", remote_addr, hints @@ -142,52 +140,3 @@ impl GreptimeDatabase for DatabaseService { cancellation::with_cancellation_handler(request_future, cancellation_future).await } } - -fn extract_hints(metadata: &MetadataMap) -> Vec<(String, String)> { - metadata - .iter() - .filter_map(|kv| { - let KeyAndValueRef::Ascii(key, value) = kv else { - return None; - }; - let key = key.as_str(); - let new_key = key.strip_prefix(GREPTIME_DB_HEADER_HINT_PREFIX)?; - let Ok(value) = value.to_str() else { - // Simply return None for non-string values. - return None; - }; - Some((new_key.to_string(), value.trim().to_string())) - }) - .collect() -} - -#[cfg(test)] -mod tests { - use tonic::metadata::MetadataValue; - - use super::*; - - #[test] - fn test_extract_hints() { - let mut metadata = MetadataMap::new(); - let prev = metadata.insert( - "x-greptime-hint-append_mode", - MetadataValue::from_static("true"), - ); - metadata.insert("test-key", MetadataValue::from_static("test-value")); - assert!(prev.is_none()); - let hints = extract_hints(&metadata); - assert_eq!(hints, vec![("append_mode".to_string(), "true".to_string())]); - } - - #[test] - fn extract_hints_ignores_non_ascii_metadata() { - let mut metadata = MetadataMap::new(); - metadata.insert_bin( - "x-greptime-hint-merge_mode-bin", - MetadataValue::from_bytes(b"last_non_null"), - ); - let hints = extract_hints(&metadata); - assert!(hints.is_empty()); - } -} diff --git a/src/servers/src/hint_headers.rs b/src/servers/src/hint_headers.rs new file mode 100644 index 0000000000..6dafd45196 --- /dev/null +++ b/src/servers/src/hint_headers.rs @@ -0,0 +1,170 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use http::HeaderMap; +use tonic::metadata::MetadataMap; + +pub const HINT_KEYS: [&str; 5] = [ + "x-greptime-hint-auto_create_table", + "x-greptime-hint-ttl", + "x-greptime-hint-append_mode", + "x-greptime-hint-merge_mode", + "x-greptime-hint-physical_table", +]; + +pub(crate) fn extract_hints(headers: &T) -> Vec<(String, String)> { + let mut hints = Vec::new(); + for key in HINT_KEYS.iter() { + if let Some(value) = headers.get(key) { + let new_key = key.replace("x-greptime-hint-", ""); + hints.push((new_key, value.trim().to_string())); + } + } + hints +} + +pub(crate) trait ToHeaderMap { + fn get(&self, key: &str) -> Option<&str>; +} + +impl ToHeaderMap for MetadataMap { + fn get(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.to_str().ok()) + } +} + +impl ToHeaderMap for HeaderMap { + fn get(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.to_str().ok()) + } +} +#[cfg(test)] +mod tests { + use http::header::{HeaderMap, HeaderValue}; + use tonic::metadata::{MetadataMap, MetadataValue}; + + use super::*; + + #[test] + fn test_extract_hints_with_full_header_map() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-greptime-hint-auto_create_table", + HeaderValue::from_static("true"), + ); + headers.insert("x-greptime-hint-ttl", HeaderValue::from_static("3600d")); + headers.insert( + "x-greptime-hint-append_mode", + HeaderValue::from_static("true"), + ); + headers.insert( + "x-greptime-hint-merge_mode", + HeaderValue::from_static("false"), + ); + headers.insert( + "x-greptime-hint-physical_table", + HeaderValue::from_static("table1"), + ); + + let hints = extract_hints(&headers); + + assert_eq!(hints.len(), 5); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + assert_eq!(hints[2], ("append_mode".to_string(), "true".to_string())); + assert_eq!(hints[3], ("merge_mode".to_string(), "false".to_string())); + assert_eq!( + hints[4], + ("physical_table".to_string(), "table1".to_string()) + ); + } + + #[test] + fn test_extract_hints_with_missing_keys() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-greptime-hint-auto_create_table", + HeaderValue::from_static("true"), + ); + headers.insert("x-greptime-hint-ttl", HeaderValue::from_static("3600d")); + + let hints = extract_hints(&headers); + + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + } + + #[test] + fn test_extract_hints_with_metadata_map() { + let mut metadata = MetadataMap::new(); + metadata.insert( + "x-greptime-hint-auto_create_table", + MetadataValue::from_static("true"), + ); + metadata.insert("x-greptime-hint-ttl", MetadataValue::from_static("3600d")); + metadata.insert( + "x-greptime-hint-append_mode", + MetadataValue::from_static("true"), + ); + metadata.insert( + "x-greptime-hint-merge_mode", + MetadataValue::from_static("false"), + ); + metadata.insert( + "x-greptime-hint-physical_table", + MetadataValue::from_static("table1"), + ); + + let hints = extract_hints(&metadata); + + assert_eq!(hints.len(), 5); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + assert_eq!(hints[2], ("append_mode".to_string(), "true".to_string())); + assert_eq!(hints[3], ("merge_mode".to_string(), "false".to_string())); + assert_eq!( + hints[4], + ("physical_table".to_string(), "table1".to_string()) + ); + } + + #[test] + fn test_extract_hints_with_partial_metadata_map() { + let mut metadata = MetadataMap::new(); + metadata.insert( + "x-greptime-hint-auto_create_table", + MetadataValue::from_static("true"), + ); + metadata.insert("x-greptime-hint-ttl", MetadataValue::from_static("3600d")); + + let hints = extract_hints(&metadata); + + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + } +} diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index d8d07ed31f..9841f02d6e 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -92,6 +92,7 @@ mod timeout; pub(crate) use timeout::DynamicTimeoutLayer; +mod hints; #[cfg(any(test, feature = "testing"))] pub mod test_helpers; @@ -703,7 +704,8 @@ impl HttpServer { .layer(middleware::from_fn_with_state( AuthState::new(self.user_provider.clone()), authorize::check_http_auth, - )), + )) + .layer(middleware::from_fn(hints::extract_hints)), ) // Handlers for debug, we don't expect a timeout. .nest( @@ -755,6 +757,10 @@ impl HttpServer { fn route_sql(api_state: ApiState) -> Router { Router::new() .route("/sql", routing::get(handler::sql).post(handler::sql)) + .route( + "/sql/parse", + routing::get(handler::sql_parse).post(handler::sql_parse), + ) .route( "/promql", routing::get(handler::promql).post(handler::promql), diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index 69498c209a..c0926af833 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -38,7 +38,7 @@ use lazy_static::lazy_static; use loki_api::prost_types::Timestamp; use pipeline::error::PipelineTransformSnafu; use pipeline::util::to_pipeline_version; -use pipeline::PipelineVersion; +use pipeline::{GreptimeTransformer, PipelineVersion}; use prost::Message; use serde::{Deserialize, Serialize}; use serde_json::{Deserializer, Map, Value}; @@ -46,8 +46,8 @@ use session::context::{Channel, QueryContext, QueryContextRef}; use snafu::{ensure, OptionExt, ResultExt}; use crate::error::{ - DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, ParseJsonSnafu, - PipelineSnafu, Result, UnsupportedContentTypeSnafu, + CatalogSnafu, DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, + ParseJsonSnafu, PipelineSnafu, Result, UnsupportedContentTypeSnafu, }; use crate::http::extractor::LogTableName; use crate::http::header::CONTENT_TYPE_PROTOBUF_STR; @@ -276,39 +276,11 @@ fn transform_ndjson_array_factory( }) } -#[axum_macros::debug_handler] -pub async fn pipeline_dryrun( - State(log_state): State, - Query(query_params): Query, - Extension(mut query_ctx): Extension, - TypedHeader(content_type): TypedHeader, - payload: String, +/// Dryrun pipeline with given data +fn dryrun_pipeline_inner( + value: Vec, + pipeline: &pipeline::Pipeline, ) -> Result { - let handler = log_state.log_handler; - let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu { - reason: "pipeline_name is required", - })?; - - let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?; - - let ignore_errors = query_params.ignore_errors.unwrap_or(false); - - let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?; - - ensure!( - value.len() <= 10, - InvalidParameterSnafu { - reason: "too many rows for dryrun", - } - ); - - query_ctx.set_channel(Channel::Http); - let query_ctx = Arc::new(query_ctx); - - let pipeline = handler - .get_pipeline(&pipeline_name, version, query_ctx.clone()) - .await?; - let mut intermediate_state = pipeline.init_intermediate_state(); let mut results = Vec::with_capacity(value.len()); @@ -387,6 +359,110 @@ pub async fn pipeline_dryrun( Ok(Json(result).into_response()) } +/// Dryrun pipeline with given data +/// pipeline_name and pipeline_version to specify pipeline stored in db +/// pipeline to specify pipeline raw content +/// data to specify data +/// data maght be list of string or list of object +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct PipelineDryrunParams { + pub pipeline_name: Option, + pub pipeline_version: Option, + pub pipeline: Option, + pub data: Vec, +} + +/// Check if the payload is valid json +/// Check if the payload contains pipeline or pipeline_name and data +/// Return Some if valid, None if invalid +fn check_pipeline_dryrun_params_valid(payload: &str) -> Option { + match serde_json::from_str::(payload) { + // payload with pipeline or pipeline_name and data is array + Ok(params) if params.pipeline.is_some() || params.pipeline_name.is_some() => Some(params), + // because of the pipeline_name or pipeline is required + Ok(_) => None, + // invalid json + Err(_) => None, + } +} + +/// Check if the pipeline_name exists +fn check_pipeline_name_exists(pipeline_name: Option) -> Result { + pipeline_name.context(InvalidParameterSnafu { + reason: "pipeline_name is required", + }) +} + +/// Check if the data length less than 10 +fn check_data_valid(data_len: usize) -> Result<()> { + ensure!( + data_len <= 10, + InvalidParameterSnafu { + reason: "data is required", + } + ); + Ok(()) +} + +#[axum_macros::debug_handler] +pub async fn pipeline_dryrun( + State(log_state): State, + Query(query_params): Query, + Extension(mut query_ctx): Extension, + TypedHeader(content_type): TypedHeader, + payload: String, +) -> Result { + let handler = log_state.log_handler; + + match check_pipeline_dryrun_params_valid(&payload) { + Some(params) => { + let data = params.data; + + check_data_valid(data.len())?; + + match params.pipeline { + None => { + let version = + to_pipeline_version(params.pipeline_version).context(PipelineSnafu)?; + let pipeline_name = check_pipeline_name_exists(params.pipeline_name)?; + let pipeline = handler + .get_pipeline(&pipeline_name, version, Arc::new(query_ctx)) + .await?; + dryrun_pipeline_inner(data, &pipeline) + } + Some(pipeline) => { + let pipeline = handler.build_pipeline(&pipeline)?; + dryrun_pipeline_inner(data, &pipeline) + } + } + } + None => { + // This path is for back compatibility with the previous dry run code + // where the payload is just data (JSON or plain text) and the pipeline name + // is specified using query param. + let pipeline_name = check_pipeline_name_exists(query_params.pipeline_name)?; + + let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?; + + let ignore_errors = query_params.ignore_errors.unwrap_or(false); + + let value = + extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?; + + check_data_valid(value.len())?; + + query_ctx.set_channel(Channel::Http); + let query_ctx = Arc::new(query_ctx); + + let pipeline = handler + .get_pipeline(&pipeline_name, version, query_ctx.clone()) + .await?; + + dryrun_pipeline_inner(value, &pipeline) + } + } +} + #[axum_macros::debug_handler] pub async fn loki_ingest( State(log_state): State, @@ -438,8 +514,8 @@ pub async fn loki_ingest( let line = entry.line; // create and init row - let mut row = Vec::with_capacity(schemas.capacity()); - for _ in 0..row.capacity() { + let mut row = Vec::with_capacity(schemas.len()); + for _ in 0..schemas.len() { row.push(GreptimeValue { value_data: None }); } // insert ts and line @@ -612,10 +688,15 @@ async fn ingest_logs_inner( let mut results = Vec::with_capacity(pipeline_data.len()); let transformed_data: Rows; if pipeline_name == GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME { - let rows = pipeline::identity_pipeline(pipeline_data) + let table = state + .get_table(&table_name, &query_ctx) + .await + .context(CatalogSnafu)?; + let rows = pipeline::identity_pipeline(pipeline_data, table) .context(PipelineTransformSnafu) .context(PipelineSnafu)?; - transformed_data = rows; + + transformed_data = rows } else { let pipeline = state .get_pipeline(&pipeline_name, version, query_ctx.clone()) diff --git a/src/servers/src/http/handler.rs b/src/servers/src/http/handler.rs index 15a1a0e16c..153b824d6e 100644 --- a/src/servers/src/http/handler.rs +++ b/src/servers/src/http/handler.rs @@ -30,8 +30,13 @@ use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING}; use serde::{Deserialize, Serialize}; use serde_json::Value; use session::context::{Channel, QueryContext, QueryContextRef}; +use snafu::ResultExt; +use sql::dialect::GreptimeDbDialect; +use sql::parser::{ParseOptions, ParserContext}; +use sql::statements::statement::Statement; use super::header::collect_plan_metrics; +use crate::error::{FailedToParseQuerySnafu, InvalidQuerySnafu, Result}; use crate::http::result::arrow_result::ArrowResponse; use crate::http::result::csv_result::CsvResponse; use crate::http::result::error_result::ErrorResponse; @@ -146,10 +151,31 @@ pub async fn sql( resp.with_execution_time(start.elapsed().as_millis() as u64) } +/// Handler to parse sql +#[axum_macros::debug_handler] +#[tracing::instrument(skip_all, fields(protocol = "http", request_type = "sql"))] +pub async fn sql_parse( + Query(query_params): Query, + Form(form_params): Form, +) -> Result>> { + let Some(sql) = query_params.sql.or(form_params.sql) else { + return InvalidQuerySnafu { + reason: "sql parameter is required.", + } + .fail(); + }; + + let stmts = + ParserContext::create_with_dialect(&sql, &GreptimeDbDialect {}, ParseOptions::default()) + .context(FailedToParseQuerySnafu)?; + + Ok(stmts.into()) +} + /// Create a response from query result pub async fn from_output( outputs: Vec>, -) -> Result<(Vec, HashMap), ErrorResponse> { +) -> std::result::Result<(Vec, HashMap), ErrorResponse> { // TODO(sunng87): this api response structure cannot represent error well. // It hides successful execution results from error response let mut results = Vec::with_capacity(outputs.len()); diff --git a/src/servers/src/http/hints.rs b/src/servers/src/http/hints.rs new file mode 100644 index 0000000000..4612201880 --- /dev/null +++ b/src/servers/src/http/hints.rs @@ -0,0 +1,30 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use axum::http::Request; +use axum::middleware::Next; +use axum::response::Response; +use session::context::QueryContext; + +use crate::hint_headers; + +pub async fn extract_hints(mut request: Request, next: Next) -> Response { + let hints = hint_headers::extract_hints(request.headers()); + if let Some(query_ctx) = request.extensions_mut().get_mut::() { + for (key, value) in hints { + query_ctx.set_extension(key, value); + } + } + next.run(request).await +} diff --git a/src/servers/src/lib.rs b/src/servers/src/lib.rs index ce6857c6d2..92f2b8b9d0 100644 --- a/src/servers/src/lib.rs +++ b/src/servers/src/lib.rs @@ -27,6 +27,7 @@ pub mod error; pub mod export_metrics; pub mod grpc; pub mod heartbeat_options; +mod hint_headers; pub mod http; pub mod influxdb; pub mod interceptor; diff --git a/src/servers/src/query_handler.rs b/src/servers/src/query_handler.rs index 58812e9350..ff92d3c5d1 100644 --- a/src/servers/src/query_handler.rs +++ b/src/servers/src/query_handler.rs @@ -39,7 +39,7 @@ use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequ use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest; use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion, PipelineWay}; use serde_json::Value; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextRef}; use crate::error::Result; use crate::influxdb::InfluxdbRequest; @@ -164,4 +164,13 @@ pub trait PipelineHandler { version: PipelineVersion, query_ctx: QueryContextRef, ) -> Result>; + + async fn get_table( + &self, + table: &str, + query_ctx: &QueryContext, + ) -> std::result::Result>, catalog::error::Error>; + + //// Build a pipeline from a string. + fn build_pipeline(&self, pipeline: &str) -> Result>; } diff --git a/src/session/src/lib.rs b/src/session/src/lib.rs index f553fef58c..c018d47ebc 100644 --- a/src/session/src/lib.rs +++ b/src/session/src/lib.rs @@ -97,10 +97,6 @@ impl Session { &self.conn_info } - pub fn mut_conn_info(&mut self) -> &mut ConnInfo { - &mut self.conn_info - } - pub fn timezone(&self) -> Timezone { self.mutable_inner.read().unwrap().timezone.clone() } diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index e3340a8f6c..3cb81d6dd4 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -30,6 +30,7 @@ itertools.workspace = true jsonb.workspace = true lazy_static.workspace = true regex.workspace = true +serde.workspace = true serde_json.workspace = true snafu.workspace = true sqlparser.workspace = true diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index d05ccf8e54..09b8eb5cad 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -326,6 +326,13 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to set SKIPPING index option"))] + SetSkippingIndexOption { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, + #[snafu(display("Datatype error: {}", source))] Datatype { source: datatypes::error::Error, @@ -375,7 +382,7 @@ impl ErrorExt for Error { ConvertSqlValue { .. } | ConvertValue { .. } => StatusCode::Unsupported, PermissionDenied { .. } => StatusCode::PermissionDenied, - SetFulltextOption { .. } => StatusCode::Unexpected, + SetFulltextOption { .. } | SetSkippingIndexOption { .. } => StatusCode::Unexpected, } } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index bb9aadadb7..f40ecb7b6e 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -36,7 +36,9 @@ use crate::error::{ SyntaxSnafu, UnexpectedSnafu, UnsupportedSnafu, }; use crate::parser::{ParserContext, FLOW}; -use crate::parsers::utils::validate_column_fulltext_create_option; +use crate::parsers::utils::{ + validate_column_fulltext_create_option, validate_column_skipping_index_create_option, +}; use crate::statements::create::{ Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM, @@ -53,6 +55,7 @@ pub const SINK: &str = "SINK"; pub const EXPIRE: &str = "EXPIRE"; pub const AFTER: &str = "AFTER"; pub const INVERTED: &str = "INVERTED"; +pub const SKIPPING: &str = "SKIPPING"; const DB_OPT_KEY_TTL: &str = "ttl"; @@ -701,6 +704,49 @@ impl<'a> ParserContext<'a> { column_extensions.vector_options = Some(options.into()); } + let mut is_index_declared = false; + + if let Token::Word(word) = parser.peek_token().token + && word.value.eq_ignore_ascii_case(SKIPPING) + { + parser.next_token(); + // Consume `INDEX` keyword + ensure!( + parser.parse_keyword(Keyword::INDEX), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "expect INDEX after SKIPPING keyword", + } + ); + ensure!( + column_extensions.skipping_index_options.is_none(), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "duplicated SKIPPING index option", + } + ); + + let options = parser + .parse_options(Keyword::WITH) + .context(error::SyntaxSnafu)? + .into_iter() + .map(parse_option_string) + .collect::>>()?; + + for key in options.keys() { + ensure!( + validate_column_skipping_index_create_option(key), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: format!("invalid SKIP option: {key}"), + } + ); + } + + column_extensions.skipping_index_options = Some(options.into()); + is_index_declared |= true; + } + if parser.parse_keyword(Keyword::FULLTEXT) { ensure!( column_extensions.fulltext_options.is_none(), @@ -738,10 +784,10 @@ impl<'a> ParserContext<'a> { } column_extensions.fulltext_options = Some(options.into()); - Ok(true) - } else { - Ok(false) + is_index_declared |= true; } + + Ok(is_index_declared) } fn parse_optional_table_constraint(&mut self) -> Result> { @@ -2103,6 +2149,57 @@ CREATE TABLE log ( .contains("invalid FULLTEXT option")); } + #[test] + fn test_parse_create_table_skip_options() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX WITH (granularity='8192', type='bloom'), +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(!col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + + let sql = r" + CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX, + )"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + } + #[test] fn test_parse_create_view_with_columns() { let sql = "CREATE VIEW test () AS SELECT * FROM NUMBERS"; diff --git a/src/sql/src/parsers/show_parser.rs b/src/sql/src/parsers/show_parser.rs index d1530c1fcb..fa31e813f3 100644 --- a/src/sql/src/parsers/show_parser.rs +++ b/src/sql/src/parsers/show_parser.rs @@ -21,9 +21,9 @@ use crate::error::{ }; use crate::parser::ParserContext; use crate::statements::show::{ - ShowColumns, ShowCreateDatabase, ShowCreateFlow, ShowCreateTable, ShowCreateView, - ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowStatus, ShowTableStatus, ShowTables, - ShowVariables, ShowViews, + ShowColumns, ShowCreateDatabase, ShowCreateFlow, ShowCreateTable, ShowCreateTableVariant, + ShowCreateView, ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowStatus, ShowTableStatus, + ShowTables, ShowVariables, ShowViews, }; use crate::statements::statement::Statement; @@ -146,7 +146,19 @@ impl ParserContext<'_> { name: table_name.to_string(), } ); - Ok(Statement::ShowCreateTable(ShowCreateTable { table_name })) + let mut variant = ShowCreateTableVariant::Original; + if self.consume_token("FOR") { + if self.consume_token("POSTGRES_FOREIGN_TABLE") { + variant = ShowCreateTableVariant::PostgresForeignTable; + } else { + self.unsupported(self.peek_token_as_string())?; + } + } + + Ok(Statement::ShowCreateTable(ShowCreateTable { + table_name, + variant, + })) } fn parse_show_create_flow(&mut self) -> Result { diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index ae5146d7ee..f7eefc4b95 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -26,7 +26,10 @@ use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; -use datatypes::schema::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; +use datatypes::schema::{ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, +}; use snafu::ResultExt; use crate::error::{ @@ -119,3 +122,11 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool { ] .contains(&key) } + +pub fn validate_column_skipping_index_create_option(key: &str) -> bool { + [ + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + ] + .contains(&key) +} diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 25cc3bf7e5..90db401cba 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -34,10 +34,8 @@ pub mod truncate; use std::str::FromStr; use api::helper::ColumnDataTypeWrapper; -use api::v1::add_column_location::LocationType; -use api::v1::{AddColumnLocation as Location, SemanticType}; +use api::v1::SemanticType; use common_base::bytes::Bytes; -use common_query::AddColumnLocation; use common_time::timezone::Timezone; use common_time::Timestamp; use datatypes::prelude::ConcreteDataType; @@ -58,7 +56,8 @@ use crate::error::{ self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu, ParseSqlValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, - TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu, + SetSkippingIndexOptionSnafu, TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, + UnsupportedUnaryOpSnafu, }; use crate::statements::create::Column; pub use crate::statements::option_map::OptionMap; @@ -513,6 +512,12 @@ pub fn column_to_schema( .context(SetFulltextOptionSnafu)?; } + if let Some(options) = column.extensions.build_skipping_index_options()? { + column_schema = column_schema + .with_skipping_options(options) + .context(SetSkippingIndexOptionSnafu)?; + } + Ok(column_schema) } @@ -681,22 +686,6 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu } } -pub fn sql_location_to_grpc_add_column_location( - location: &Option, -) -> Option { - match location { - Some(AddColumnLocation::First) => Some(Location { - location_type: LocationType::First.into(), - after_column_name: String::default(), - }), - Some(AddColumnLocation::After { column_name }) => Some(Location { - location_type: LocationType::After.into(), - after_column_name: column_name.to_string(), - }), - None => None, - } -} - #[cfg(test)] mod tests { use std::assert_matches::assert_matches; @@ -1519,6 +1508,7 @@ mod tests { .into(), ), vector_options: None, + skipping_index_options: None, }, }; diff --git a/src/sql/src/statements/admin.rs b/src/sql/src/statements/admin.rs index bbe805a4c1..ed068ea475 100644 --- a/src/sql/src/statements/admin.rs +++ b/src/sql/src/statements/admin.rs @@ -14,12 +14,13 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::Function; /// `ADMIN` statement to execute some administration commands. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Admin { /// Run a admin function. Func(Function), diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index cf59257e89..df148ae5b6 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -18,10 +18,11 @@ use api::v1; use common_query::AddColumnLocation; use datatypes::schema::FulltextOptions; use itertools::Itertools; +use serde::Serialize; use sqlparser::ast::{ColumnDef, DataType, Ident, ObjectName, TableConstraint}; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct AlterTable { pub table_name: ObjectName, pub alter_operation: AlterTableOperation, @@ -56,7 +57,7 @@ impl Display for AlterTable { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), @@ -71,29 +72,20 @@ pub enum AlterTableOperation { target_type: DataType, }, /// `SET =
` - SetTableOptions { - options: Vec, - }, - UnsetTableOptions { - keys: Vec, - }, + SetTableOptions { options: Vec }, + /// `UNSET
` + UnsetTableOptions { keys: Vec }, /// `DROP COLUMN ` - DropColumn { - name: Ident, - }, + DropColumn { name: Ident }, /// `RENAME ` - RenameTable { - new_table_name: String, - }, + RenameTable { new_table_name: String }, /// `MODIFY COLUMN SET FULLTEXT [WITH ]` SetColumnFulltext { column_name: Ident, options: FulltextOptions, }, /// `MODIFY COLUMN UNSET FULLTEXT` - UnsetColumnFulltext { - column_name: Ident, - }, + UnsetColumnFulltext { column_name: Ident }, } impl Display for AlterTableOperation { @@ -151,7 +143,7 @@ impl Display for AlterTableOperation { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct KeyValueOption { pub key: String, pub value: String, @@ -166,7 +158,7 @@ impl From for v1::Option { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct AlterDatabase { pub database_name: ObjectName, pub alter_operation: AlterDatabaseOperation, @@ -197,7 +189,7 @@ impl Display for AlterDatabase { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum AlterDatabaseOperation { SetDatabaseOption { options: Vec }, UnsetDatabaseOption { keys: Vec }, diff --git a/src/sql/src/statements/copy.rs b/src/sql/src/statements/copy.rs index c68b9d8c03..436d86d3ab 100644 --- a/src/sql/src/statements/copy.rs +++ b/src/sql/src/statements/copy.rs @@ -14,12 +14,13 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; use crate::statements::OptionMap; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Copy { CopyTable(CopyTable), CopyDatabase(CopyDatabase), @@ -34,7 +35,7 @@ impl Display for Copy { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum CopyTable { To(CopyTableArgument), From(CopyTableArgument), @@ -65,7 +66,7 @@ impl Display for CopyTable { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum CopyDatabase { To(CopyDatabaseArgument), From(CopyDatabaseArgument), @@ -96,7 +97,7 @@ impl Display for CopyDatabase { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CopyDatabaseArgument { pub database_name: ObjectName, pub with: OptionMap, @@ -104,7 +105,7 @@ pub struct CopyDatabaseArgument { pub location: String, } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CopyTableArgument { pub table_name: ObjectName, pub with: OptionMap, diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index 20ed7b5559..3ea265fb7f 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -16,14 +16,15 @@ use std::collections::HashMap; use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; -use datatypes::schema::FulltextOptions; +use datatypes::schema::{FulltextOptions, SkippingIndexOptions}; use itertools::Itertools; +use serde::Serialize; use snafu::ResultExt; use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; -use crate::error::{Result, SetFulltextOptionSnafu}; +use crate::error::{Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu}; use crate::statements::statement::Statement; use crate::statements::OptionMap; @@ -58,7 +59,7 @@ fn format_table_constraint(constraints: &[TableConstraint]) -> String { } /// Table constraint for create table statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub enum TableConstraint { /// Primary key constraint. PrimaryKey { columns: Vec }, @@ -84,7 +85,7 @@ impl Display for TableConstraint { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateTable { /// Create if not exists pub if_not_exists: bool, @@ -100,7 +101,7 @@ pub struct CreateTable { } /// Column definition in `CREATE TABLE` statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct Column { /// `ColumnDef` from `sqlparser::ast` pub column_def: ColumnDef, @@ -109,12 +110,14 @@ pub struct Column { } /// Column extensions for greptimedb dialect. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default, Serialize)] pub struct ColumnExtensions { /// Fulltext options. pub fulltext_options: Option, /// Vector options. pub vector_options: Option, + /// Skipping index options. + pub skipping_index_options: Option, } impl Column { @@ -157,6 +160,15 @@ impl Display for Column { write!(f, " FULLTEXT")?; } } + + if let Some(skipping_index_options) = &self.extensions.skipping_index_options { + if !skipping_index_options.is_empty() { + let options = skipping_index_options.kv_pairs(); + write!(f, " SKIPPING INDEX WITH({})", format_list_comma!(options))?; + } else { + write!(f, " SKIPPING INDEX")?; + } + } Ok(()) } } @@ -170,9 +182,20 @@ impl ColumnExtensions { let options: HashMap = options.clone().into_map(); Ok(Some(options.try_into().context(SetFulltextOptionSnafu)?)) } + + pub fn build_skipping_index_options(&self) -> Result> { + let Some(options) = self.skipping_index_options.as_ref() else { + return Ok(None); + }; + + let options: HashMap = options.clone().into_map(); + Ok(Some( + options.try_into().context(SetSkippingIndexOptionSnafu)?, + )) + } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct Partitions { pub column_list: Vec, pub exprs: Vec, @@ -244,7 +267,7 @@ impl Display for CreateTable { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateDatabase { pub name: ObjectName, /// Create if not exists @@ -278,7 +301,7 @@ impl Display for CreateDatabase { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateExternalTable { /// Table name pub name: ObjectName, @@ -309,7 +332,7 @@ impl Display for CreateExternalTable { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateTableLike { /// Table name pub table_name: ObjectName, @@ -325,7 +348,7 @@ impl Display for CreateTableLike { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateFlow { /// Flow name pub flow_name: ObjectName, @@ -367,7 +390,7 @@ impl Display for CreateFlow { } /// Create SQL view statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateView { /// View name pub name: ObjectName, diff --git a/src/sql/src/statements/cursor.rs b/src/sql/src/statements/cursor.rs index 72ef4cdcae..4381cc5e7b 100644 --- a/src/sql/src/statements/cursor.rs +++ b/src/sql/src/statements/cursor.rs @@ -14,6 +14,7 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; @@ -22,7 +23,7 @@ use super::query::Query; /// Represents a DECLARE CURSOR statement /// /// This statement will carry a SQL query -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DeclareCursor { pub cursor_name: ObjectName, pub query: Box, @@ -35,7 +36,7 @@ impl Display for DeclareCursor { } /// Represents a FETCH FROM cursor statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct FetchCursor { pub cursor_name: ObjectName, pub fetch_size: u64, @@ -48,7 +49,7 @@ impl Display for FetchCursor { } /// Represents a CLOSE cursor statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CloseCursor { pub cursor_name: ObjectName, } diff --git a/src/sql/src/statements/delete.rs b/src/sql/src/statements/delete.rs index 4346610b7d..dc8f5d6901 100644 --- a/src/sql/src/statements/delete.rs +++ b/src/sql/src/statements/delete.rs @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde::Serialize; use sqlparser::ast::Statement; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Delete { pub inner: Statement, } diff --git a/src/sql/src/statements/describe.rs b/src/sql/src/statements/describe.rs index 743f2b0123..1a7bba24e5 100644 --- a/src/sql/src/statements/describe.rs +++ b/src/sql/src/statements/describe.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// SQL structure for `DESCRIBE TABLE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DescribeTable { name: ObjectName, } diff --git a/src/sql/src/statements/drop.rs b/src/sql/src/statements/drop.rs index a46450db78..799722904d 100644 --- a/src/sql/src/statements/drop.rs +++ b/src/sql/src/statements/drop.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// DROP TABLE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropTable { table_names: Vec, @@ -62,7 +63,7 @@ impl Display for DropTable { } /// DROP DATABASE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropDatabase { name: ObjectName, /// drop table if exists @@ -99,7 +100,7 @@ impl Display for DropDatabase { } /// DROP FLOW statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropFlow { flow_name: ObjectName, /// drop flow if exists @@ -138,7 +139,7 @@ impl Display for DropFlow { } /// `DROP VIEW` statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropView { // The view name pub view_name: ObjectName, diff --git a/src/sql/src/statements/explain.rs b/src/sql/src/statements/explain.rs index 5b3a2671f9..96a12c7a41 100644 --- a/src/sql/src/statements/explain.rs +++ b/src/sql/src/statements/explain.rs @@ -14,13 +14,14 @@ use std::fmt::{Display, Formatter}; +use serde::Serialize; use sqlparser::ast::Statement as SpStatement; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; /// Explain statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Explain { pub inner: SpStatement, } diff --git a/src/sql/src/statements/insert.rs b/src/sql/src/statements/insert.rs index 4eae7f1e18..f1c0b71444 100644 --- a/src/sql/src/statements/insert.rs +++ b/src/sql/src/statements/insert.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde::Serialize; use sqlparser::ast::{ObjectName, Query, SetExpr, Statement, UnaryOperator, Values}; use sqlparser::parser::ParserError; use sqlparser_derive::{Visit, VisitMut}; @@ -20,7 +21,7 @@ use crate::ast::{Expr, Value}; use crate::error::Result; use crate::statements::query::Query as GtQuery; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Insert { // Can only be sqlparser::ast::Statement::Insert variant pub inner: Statement, diff --git a/src/sql/src/statements/option_map.rs b/src/sql/src/statements/option_map.rs index 9ff8d94312..d66cadf164 100644 --- a/src/sql/src/statements/option_map.rs +++ b/src/sql/src/statements/option_map.rs @@ -16,14 +16,16 @@ use std::collections::{BTreeMap, HashMap}; use std::ops::ControlFlow; use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString}; +use serde::Serialize; use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut}; const REDACTED_OPTIONS: [&str; 2] = ["access_key_id", "secret_access_key"]; /// Options hashmap. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Serialize)] pub struct OptionMap { options: BTreeMap, + #[serde(skip_serializing)] secrets: BTreeMap, } diff --git a/src/sql/src/statements/query.rs b/src/sql/src/statements/query.rs index 3b571a1a0b..b5221a2263 100644 --- a/src/sql/src/statements/query.rs +++ b/src/sql/src/statements/query.rs @@ -14,13 +14,14 @@ use std::fmt; +use serde::Serialize; use sqlparser::ast::Query as SpQuery; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; /// Query statement instance. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Query { pub inner: SpQuery, } diff --git a/src/sql/src/statements/set_variables.rs b/src/sql/src/statements/set_variables.rs index 7a2a94a531..748d077d84 100644 --- a/src/sql/src/statements/set_variables.rs +++ b/src/sql/src/statements/set_variables.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::{Expr, ObjectName}; use sqlparser_derive::{Visit, VisitMut}; /// SET variables statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct SetVariables { pub variable: ObjectName, pub value: Vec, diff --git a/src/sql/src/statements/show.rs b/src/sql/src/statements/show.rs index f6a8dab728..92f13422e6 100644 --- a/src/sql/src/statements/show.rs +++ b/src/sql/src/statements/show.rs @@ -14,12 +14,13 @@ use std::fmt::{self, Display}; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{Expr, Ident, ObjectName}; /// Show kind for SQL expressions like `SHOW DATABASE` or `SHOW TABLE` -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum ShowKind { All, Like(Ident), @@ -46,14 +47,14 @@ macro_rules! format_kind { } /// SQL structure for `SHOW DATABASES`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowDatabases { pub kind: ShowKind, pub full: bool, } /// The SQL `SHOW COLUMNS` statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowColumns { pub kind: ShowKind, pub table: String, @@ -77,7 +78,7 @@ impl Display for ShowColumns { } /// The SQL `SHOW INDEX` statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowIndex { pub kind: ShowKind, pub table: String, @@ -118,7 +119,7 @@ impl Display for ShowDatabases { } /// SQL structure for `SHOW TABLES`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowTables { pub kind: ShowKind, pub database: Option, @@ -142,7 +143,7 @@ impl Display for ShowTables { } /// SQL structure for `SHOW TABLE STATUS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowTableStatus { pub kind: ShowKind, pub database: Option, @@ -162,7 +163,7 @@ impl Display for ShowTableStatus { } /// SQL structure for `SHOW CREATE DATABASE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateDatabase { pub database_name: ObjectName, } @@ -175,20 +176,34 @@ impl Display for ShowCreateDatabase { } /// SQL structure for `SHOW CREATE TABLE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateTable { pub table_name: ObjectName, + pub variant: ShowCreateTableVariant, +} + +/// Variant of a show create table +#[derive(Default, Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub enum ShowCreateTableVariant { + #[default] + Original, + PostgresForeignTable, } impl Display for ShowCreateTable { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let table_name = &self.table_name; - write!(f, r#"SHOW CREATE TABLE {table_name}"#) + write!(f, r#"SHOW CREATE TABLE {table_name}"#)?; + if let ShowCreateTableVariant::PostgresForeignTable = self.variant { + write!(f, " FOR POSTGRES_FOREIGN_TABLE")?; + } + + Ok(()) } } /// SQL structure for `SHOW CREATE FLOW`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateFlow { pub flow_name: ObjectName, } @@ -201,7 +216,7 @@ impl Display for ShowCreateFlow { } /// SQL structure for `SHOW FLOWS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowFlows { pub kind: ShowKind, pub database: Option, @@ -220,7 +235,7 @@ impl Display for ShowFlows { } /// SQL structure for `SHOW CREATE VIEW`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateView { pub view_name: ObjectName, } @@ -233,7 +248,7 @@ impl Display for ShowCreateView { } /// SQL structure for `SHOW VIEWS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowViews { pub kind: ShowKind, pub database: Option, @@ -252,7 +267,7 @@ impl Display for ShowViews { } /// SQL structure for `SHOW VARIABLES xxx`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowVariables { pub variable: ObjectName, } @@ -265,7 +280,7 @@ impl Display for ShowVariables { } /// SQL structure for "SHOW STATUS" -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowStatus {} impl Display for ShowStatus { @@ -343,12 +358,31 @@ mod tests { Statement::ShowCreateTable(show) => { let table_name = show.table_name.to_string(); assert_eq!(table_name, "test"); + assert_eq!(show.variant, ShowCreateTableVariant::Original); + } + _ => { + unreachable!(); + } + } + + let sql = "SHOW CREATE TABLE test FOR POSTGRES_FOREIGN_TABLE"; + let stmts: Vec = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(1, stmts.len()); + assert_matches!(&stmts[0], Statement::ShowCreateTable { .. }); + match &stmts[0] { + Statement::ShowCreateTable(show) => { + let table_name = show.table_name.to_string(); + assert_eq!(table_name, "test"); + assert_eq!(show.variant, ShowCreateTableVariant::PostgresForeignTable); } _ => { unreachable!(); } } } + #[test] pub fn test_show_create_missing_table_name() { let sql = "SHOW CREATE TABLE"; @@ -360,6 +394,17 @@ mod tests { .is_err()); } + #[test] + pub fn test_show_create_unknown_for() { + let sql = "SHOW CREATE TABLE t FOR UNKNOWN"; + assert!(ParserContext::create_with_dialect( + sql, + &GreptimeDbDialect {}, + ParseOptions::default() + ) + .is_err()); + } + #[test] pub fn test_show_create_flow() { let sql = "SHOW CREATE FLOW test"; diff --git a/src/sql/src/statements/statement.rs b/src/sql/src/statements/statement.rs index 8ad391a00d..2870f2b64a 100644 --- a/src/sql/src/statements/statement.rs +++ b/src/sql/src/statements/statement.rs @@ -15,12 +15,14 @@ use std::fmt::Display; use datafusion_sql::parser::Statement as DfStatement; +use serde::Serialize; use sqlparser::ast::Statement as SpStatement; use sqlparser_derive::{Visit, VisitMut}; use crate::error::{ConvertToDfStatementSnafu, Error}; use crate::statements::admin::Admin; use crate::statements::alter::{AlterDatabase, AlterTable}; +use crate::statements::copy::Copy; use crate::statements::create::{ CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, }; @@ -42,7 +44,7 @@ use crate::statements::truncate::TruncateTable; /// Tokens parsed by `DFParser` are converted into these values. #[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Statement { // Query Query(Box), @@ -107,7 +109,8 @@ pub enum Statement { // EXPLAIN QUERY Explain(Explain), // COPY - Copy(crate::statements::copy::Copy), + Copy(Copy), + // Telemetry Query Language Tql(Tql), // TRUNCATE TABLE TruncateTable(TruncateTable), diff --git a/src/sql/src/statements/tql.rs b/src/sql/src/statements/tql.rs index 0f7a85f95a..7980103431 100644 --- a/src/sql/src/statements/tql.rs +++ b/src/sql/src/statements/tql.rs @@ -14,9 +14,10 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Tql { Eval(TqlEval), Explain(TqlExplain), @@ -49,7 +50,7 @@ fn format_tql( } /// TQL EVAL (, , , [lookback]) -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlEval { pub start: String, pub end: String, @@ -74,7 +75,7 @@ impl Display for TqlEval { /// TQL EXPLAIN [VERBOSE] [, , , [lookback]] /// doesn't execute the query but tells how the query would be executed (similar to SQL EXPLAIN). -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlExplain { pub start: String, pub end: String, @@ -103,7 +104,7 @@ impl Display for TqlExplain { /// TQL ANALYZE [VERBOSE] (, , , [lookback]) /// executes the plan and tells the detailed per-step execution time (similar to SQL ANALYZE). -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlAnalyze { pub start: String, pub end: String, diff --git a/src/sql/src/statements/transform/type_alias.rs b/src/sql/src/statements/transform/type_alias.rs index 9e51ca9180..d670a63b47 100644 --- a/src/sql/src/statements/transform/type_alias.rs +++ b/src/sql/src/statements/transform/type_alias.rs @@ -57,6 +57,10 @@ impl TransformRule for TypeAliasTransformRule { alter_table.alter_operation_mut() { replace_type_alias(target_type) + } else if let AlterTableOperation::AddColumn { column_def, .. } = + alter_table.alter_operation_mut() + { + replace_type_alias(&mut column_def.data_type); } } _ => {} diff --git a/src/sql/src/statements/truncate.rs b/src/sql/src/statements/truncate.rs index c1a063f959..710b5f72df 100644 --- a/src/sql/src/statements/truncate.rs +++ b/src/sql/src/statements/truncate.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// TRUNCATE TABLE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TruncateTable { table_name: ObjectName, } diff --git a/src/table/src/predicate.rs b/src/table/src/predicate.rs index 267f60b108..1fd5cdcbd3 100644 --- a/src/table/src/predicate.rs +++ b/src/table/src/predicate.rs @@ -135,21 +135,17 @@ impl Predicate { // since it requires query engine to convert sql to filters. /// `build_time_range_predicate` extracts time range from logical exprs to facilitate fast /// time range pruning. -pub fn build_time_range_predicate<'a>( - ts_col_name: &'a str, +pub fn build_time_range_predicate( + ts_col_name: &str, ts_col_unit: TimeUnit, - filters: &'a mut Vec, + filters: &[Expr], ) -> TimestampRange { let mut res = TimestampRange::min_to_max(); - let mut filters_remain = vec![]; - for expr in std::mem::take(filters) { - if let Some(range) = extract_time_range_from_expr(ts_col_name, ts_col_unit, &expr) { + for expr in filters { + if let Some(range) = extract_time_range_from_expr(ts_col_name, ts_col_unit, expr) { res = res.and(&range); - } else { - filters_remain.push(expr); } } - *filters = filters_remain; res } @@ -392,7 +388,7 @@ mod tests { fn check_build_predicate(expr: Expr, expect: TimestampRange) { assert_eq!( expect, - build_time_range_predicate("ts", TimeUnit::Millisecond, &mut vec![expr]) + build_time_range_predicate("ts", TimeUnit::Millisecond, &[expr]) ); } diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index cbac9df713..c408992bd5 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -18,6 +18,7 @@ unstable = ["nix"] arbitrary = { version = "1.3.0", features = ["derive"] } async-trait = { workspace = true } chrono = { workspace = true } +common-base = { workspace = true } common-error = { workspace = true } common-macro = { workspace = true } common-query = { workspace = true } @@ -67,14 +68,14 @@ dotenv.workspace = true [[bin]] name = "fuzz_create_table" -path = "targets/fuzz_create_table.rs" +path = "targets/ddl/fuzz_create_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_create_logical_table" -path = "targets/fuzz_create_logical_table.rs" +path = "targets/ddl/fuzz_create_logical_table.rs" test = false bench = false doc = false @@ -95,21 +96,21 @@ doc = false [[bin]] name = "fuzz_alter_table" -path = "targets/fuzz_alter_table.rs" +path = "targets/ddl/fuzz_alter_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_alter_logical_table" -path = "targets/fuzz_alter_logical_table.rs" +path = "targets/ddl/fuzz_alter_logical_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_create_database" -path = "targets/fuzz_create_database.rs" +path = "targets/ddl/fuzz_create_database.rs" test = false bench = false doc = false diff --git a/tests-fuzz/src/context.rs b/tests-fuzz/src/context.rs index 8cfd0ca9fa..d0d5dee72d 100644 --- a/tests-fuzz/src/context.rs +++ b/tests-fuzz/src/context.rs @@ -21,7 +21,7 @@ use snafu::{ensure, OptionExt}; use crate::error::{self, Result}; use crate::generator::Random; -use crate::ir::alter_expr::AlterTableOperation; +use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption}; use crate::ir::{AlterTableExpr, Column, CreateTableExpr, Ident}; pub type TableContextRef = Arc; @@ -35,6 +35,7 @@ pub struct TableContext { // GreptimeDB specific options pub partition: Option, pub primary_keys: Vec, + pub table_options: Vec, } impl From<&CreateTableExpr> for TableContext { @@ -52,6 +53,7 @@ impl From<&CreateTableExpr> for TableContext { columns: columns.clone(), partition: partition.clone(), primary_keys: primary_keys.clone(), + table_options: vec![], } } } @@ -64,7 +66,7 @@ impl TableContext { /// Applies the [AlterTableExpr]. pub fn alter(mut self, expr: AlterTableExpr) -> Result { - match expr.alter_options { + match expr.alter_kinds { AlterTableOperation::AddColumn { column, location } => { ensure!( !self.columns.iter().any(|col| col.name == column.name), @@ -140,6 +142,25 @@ impl TableContext { } Ok(self) } + AlterTableOperation::SetTableOptions { options } => { + for option in options { + if let Some(idx) = self + .table_options + .iter() + .position(|opt| opt.key() == option.key()) + { + self.table_options[idx] = option; + } else { + self.table_options.push(option); + } + } + Ok(self) + } + AlterTableOperation::UnsetTableOptions { keys } => { + self.table_options + .retain(|opt| !keys.contains(&opt.key().to_string())); + Ok(self) + } } } @@ -171,10 +192,11 @@ impl TableContext { #[cfg(test)] mod tests { use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::TableContext; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column, Ident}; @@ -185,11 +207,12 @@ mod tests { columns: vec![], partition: None, primary_keys: vec![], + table_options: vec![], }; // Add a column let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "a".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -205,7 +228,7 @@ mod tests { // Add a column at first let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "b".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -221,7 +244,7 @@ mod tests { // Add a column after "b" let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "c".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -239,10 +262,32 @@ mod tests { // Drop the column "b" let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::DropColumn { name: "b".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "b".into() }, }; let table_ctx = table_ctx.alter(expr).unwrap(); assert_eq!(table_ctx.columns[1].name, Ident::new("a")); assert_eq!(table_ctx.primary_keys, vec![0, 1]); + + // Set table options + let ttl_option = AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))); + let expr = AlterTableExpr { + table_name: "foo".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ttl_option.clone()], + }, + }; + let table_ctx = table_ctx.alter(expr).unwrap(); + assert_eq!(table_ctx.table_options.len(), 1); + assert_eq!(table_ctx.table_options[0], ttl_option); + + // Unset table options + let expr = AlterTableExpr { + table_name: "foo".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec![ttl_option.key().to_string()], + }, + }; + let table_ctx = table_ctx.alter(expr).unwrap(); + assert_eq!(table_ctx.table_options.len(), 0); } } diff --git a/tests-fuzz/src/generator/alter_expr.rs b/tests-fuzz/src/generator/alter_expr.rs index 03aed702fb..0c5a628999 100644 --- a/tests-fuzz/src/generator/alter_expr.rs +++ b/tests-fuzz/src/generator/alter_expr.rs @@ -14,17 +14,19 @@ use std::marker::PhantomData; +use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; use datatypes::data_type::ConcreteDataType; use derive_builder::Builder; use rand::Rng; use snafu::ensure; +use strum::IntoEnumIterator; use crate::context::TableContextRef; use crate::error::{self, Error, Result}; use crate::fake::WordGenerator; use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Generator, Random}; -use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation}; +use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{ droppable_columns, generate_columns, generate_random_value, modifiable_columns, Column, @@ -107,7 +109,7 @@ impl Generator for AlterExprAddColumnGenera .remove(0); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::AddColumn { column, location }, + alter_kinds: AlterTableOperation::AddColumn { column, location }, }) } } @@ -130,7 +132,7 @@ impl Generator for AlterExprDropColumnGenerator { let name = droppable[rng.gen_range(0..droppable.len())].name.clone(); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::DropColumn { name }, + alter_kinds: AlterTableOperation::DropColumn { name }, }) } } @@ -153,7 +155,7 @@ impl Generator for AlterExprRenameGenerator { .generate_unique_table_name(rng, self.name_generator.as_ref()); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::RenameTable { new_table_name }, + alter_kinds: AlterTableOperation::RenameTable { new_table_name }, }) } } @@ -180,7 +182,7 @@ impl Generator for AlterExprModifyDataTypeGenerator Generator for AlterExprModifyDataTypeGenerator { + table_ctx: TableContextRef, + #[builder(default)] + _phantom: PhantomData, +} + +impl Generator for AlterExprSetTableOptionsGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + let all_options = AlterTableOption::iter().collect::>(); + // Generate random distinct options + let mut option_templates_idx = vec![]; + for _ in 1..rng.gen_range(2..=all_options.len()) { + let option = rng.gen_range(0..all_options.len()); + if !option_templates_idx.contains(&option) { + option_templates_idx.push(option); + } + } + let options = option_templates_idx + .iter() + .map(|idx| match all_options[*idx] { + AlterTableOption::Ttl(_) => { + let ttl_type = rng.gen_range(0..3); + match ttl_type { + 0 => { + let duration: u32 = rng.gen(); + AlterTableOption::Ttl(Ttl::Duration((duration as i64).into())) + } + 1 => AlterTableOption::Ttl(Ttl::Instant), + 2 => AlterTableOption::Ttl(Ttl::Forever), + _ => unreachable!(), + } + } + AlterTableOption::TwcsTimeWindow(_) => { + let time_window: u32 = rng.gen(); + AlterTableOption::TwcsTimeWindow((time_window as i64).into()) + } + AlterTableOption::TwcsMaxOutputFileSize(_) => { + let max_output_file_size: u64 = rng.gen(); + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize(max_output_file_size)) + } + AlterTableOption::TwcsMaxInactiveWindowRuns(_) => { + let max_inactive_window_runs: u64 = rng.gen(); + AlterTableOption::TwcsMaxInactiveWindowRuns(max_inactive_window_runs) + } + AlterTableOption::TwcsMaxActiveWindowFiles(_) => { + let max_active_window_files: u64 = rng.gen(); + AlterTableOption::TwcsMaxActiveWindowFiles(max_active_window_files) + } + AlterTableOption::TwcsMaxActiveWindowRuns(_) => { + let max_active_window_runs: u64 = rng.gen(); + AlterTableOption::TwcsMaxActiveWindowRuns(max_active_window_runs) + } + AlterTableOption::TwcsMaxInactiveWindowFiles(_) => { + let max_inactive_window_files: u64 = rng.gen(); + AlterTableOption::TwcsMaxInactiveWindowFiles(max_inactive_window_files) + } + }) + .collect(); + Ok(AlterTableExpr { + table_name: self.table_ctx.name.clone(), + alter_kinds: AlterTableOperation::SetTableOptions { options }, + }) + } +} + +/// Generates the [AlterTableOperation::UnsetTableOptions] of [AlterTableExpr]. +#[derive(Builder)] +#[builder(pattern = "owned")] +pub struct AlterExprUnsetTableOptionsGenerator { + table_ctx: TableContextRef, + #[builder(default)] + _phantom: PhantomData, +} + +impl Generator for AlterExprUnsetTableOptionsGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + let all_options = AlterTableOption::iter().collect::>(); + // Generate random distinct options + let mut option_templates_idx = vec![]; + for _ in 1..rng.gen_range(2..=all_options.len()) { + let option = rng.gen_range(0..all_options.len()); + if !option_templates_idx.contains(&option) { + option_templates_idx.push(option); + } + } + let options = option_templates_idx + .iter() + .map(|idx| all_options[*idx].key().to_string()) + .collect(); + Ok(AlterTableExpr { + table_name: self.table_ctx.name.clone(), + alter_kinds: AlterTableOperation::UnsetTableOptions { keys: options }, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -220,7 +325,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#; assert_eq!(expected, serialized); let expr = AlterExprRenameGeneratorBuilder::default() @@ -230,7 +335,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#; assert_eq!(expected, serialized); let expr = AlterExprDropColumnGeneratorBuilder::default() @@ -240,17 +345,37 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#; assert_eq!(expected, serialized); let expr = AlterExprModifyDataTypeGeneratorBuilder::default() + .table_ctx(table_ctx.clone()) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let serialized = serde_json::to_string(&expr).unwrap(); + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"ModifyDataType":{"column":{"name":{"value":"toTAm","quote_style":null},"column_type":{"Int64":{}},"options":[]}}}}"#; + assert_eq!(expected, serialized); + + let expr = AlterExprSetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx.clone()) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let serialized = serde_json::to_string(&expr).unwrap(); + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"SetTableOptions":{"options":[{"TwcsMaxActiveWindowRuns":14908016120444947142},{"TwcsMaxActiveWindowFiles":5840340123887173415},{"TwcsMaxOutputFileSize":17740311466571102265}]}}}"#; + assert_eq!(expected, serialized); + + let expr = AlterExprUnsetTableOptionsGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"ModifyDataType":{"column":{"name":{"value":"toTAm","quote_style":null},"column_type":{"Int64":{}},"options":[]}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"UnsetTableOptions":{"keys":["compaction.twcs.max_active_window_runs"]}}}"#; assert_eq!(expected, serialized); } } diff --git a/tests-fuzz/src/ir.rs b/tests-fuzz/src/ir.rs index b9d13ca9fb..ae6edd595c 100644 --- a/tests-fuzz/src/ir.rs +++ b/tests-fuzz/src/ir.rs @@ -24,7 +24,7 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::time::Duration; -pub use alter_expr::AlterTableExpr; +pub use alter_expr::{AlterTableExpr, AlterTableOption}; use common_time::timestamp::TimeUnit; use common_time::{Date, DateTime, Timestamp}; pub use create_expr::{CreateDatabaseExpr, CreateTableExpr}; diff --git a/tests-fuzz/src/ir/alter_expr.rs b/tests-fuzz/src/ir/alter_expr.rs index a9fdc18c22..1d637ff660 100644 --- a/tests-fuzz/src/ir/alter_expr.rs +++ b/tests-fuzz/src/ir/alter_expr.rs @@ -12,16 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Display; +use std::str::FromStr; + +use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; +use common_time::{Duration, FOREVER, INSTANT}; use derive_builder::Builder; use serde::{Deserialize, Serialize}; +use store_api::mito_engine_options::{ + APPEND_MODE_KEY, COMPACTION_TYPE, TTL_KEY, TWCS_MAX_ACTIVE_WINDOW_FILES, + TWCS_MAX_ACTIVE_WINDOW_RUNS, TWCS_MAX_INACTIVE_WINDOW_FILES, TWCS_MAX_INACTIVE_WINDOW_RUNS, + TWCS_MAX_OUTPUT_FILE_SIZE, TWCS_TIME_WINDOW, +}; +use strum::EnumIter; +use crate::error::{self, Result}; use crate::ir::{Column, Ident}; #[derive(Debug, Builder, Clone, Serialize, Deserialize)] pub struct AlterTableExpr { pub table_name: Ident, - pub alter_options: AlterTableOperation, + pub alter_kinds: AlterTableOperation, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -37,4 +49,196 @@ pub enum AlterTableOperation { RenameTable { new_table_name: Ident }, /// `MODIFY COLUMN ` ModifyDataType { column: Column }, + /// `SET
=
` + SetTableOptions { options: Vec }, + /// `UNSET
` + UnsetTableOptions { keys: Vec }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +pub enum Ttl { + Duration(Duration), + Instant, + #[default] + Forever, +} + +impl Display for Ttl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Ttl::Duration(d) => write!(f, "{}", d), + Ttl::Instant => write!(f, "{}", INSTANT), + Ttl::Forever => write!(f, "{}", FOREVER), + } + } +} + +#[derive(Debug, EnumIter, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum AlterTableOption { + Ttl(Ttl), + TwcsTimeWindow(Duration), + TwcsMaxOutputFileSize(ReadableSize), + TwcsMaxInactiveWindowFiles(u64), + TwcsMaxActiveWindowFiles(u64), + TwcsMaxInactiveWindowRuns(u64), + TwcsMaxActiveWindowRuns(u64), +} + +impl AlterTableOption { + pub fn key(&self) -> &str { + match self { + AlterTableOption::Ttl(_) => TTL_KEY, + AlterTableOption::TwcsTimeWindow(_) => TWCS_TIME_WINDOW, + AlterTableOption::TwcsMaxOutputFileSize(_) => TWCS_MAX_OUTPUT_FILE_SIZE, + AlterTableOption::TwcsMaxInactiveWindowFiles(_) => TWCS_MAX_INACTIVE_WINDOW_FILES, + AlterTableOption::TwcsMaxActiveWindowFiles(_) => TWCS_MAX_ACTIVE_WINDOW_FILES, + AlterTableOption::TwcsMaxInactiveWindowRuns(_) => TWCS_MAX_INACTIVE_WINDOW_RUNS, + AlterTableOption::TwcsMaxActiveWindowRuns(_) => TWCS_MAX_ACTIVE_WINDOW_RUNS, + } + } + + /// Parses the AlterTableOption from a key-value pair + fn parse_kv(key: &str, value: &str) -> Result { + match key { + TTL_KEY => { + let ttl = if value.to_lowercase() == INSTANT { + Ttl::Instant + } else if value.to_lowercase() == FOREVER { + Ttl::Forever + } else { + let duration = humantime::parse_duration(value).unwrap(); + Ttl::Duration(duration.into()) + }; + Ok(AlterTableOption::Ttl(ttl)) + } + TWCS_MAX_ACTIVE_WINDOW_RUNS => { + let runs = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxActiveWindowRuns(runs)) + } + TWCS_MAX_ACTIVE_WINDOW_FILES => { + let files = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxActiveWindowFiles(files)) + } + TWCS_MAX_INACTIVE_WINDOW_RUNS => { + let runs = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxInactiveWindowRuns(runs)) + } + TWCS_MAX_INACTIVE_WINDOW_FILES => { + let files = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxInactiveWindowFiles(files)) + } + TWCS_MAX_OUTPUT_FILE_SIZE => { + // may be "1M" instead of "1 MiB" + let value = if value.ends_with("B") { + value.to_string() + } else { + format!("{}B", value) + }; + let size = ReadableSize::from_str(&value).unwrap(); + Ok(AlterTableOption::TwcsMaxOutputFileSize(size)) + } + TWCS_TIME_WINDOW => { + let time = humantime::parse_duration(value).unwrap(); + Ok(AlterTableOption::TwcsTimeWindow(time.into())) + } + _ => error::UnexpectedSnafu { + violated: format!("Unknown table option key: {}", key), + } + .fail(), + } + } + + /// Parses the AlterTableOption from comma-separated string + pub fn parse_kv_pairs(option_string: &str) -> Result> { + let mut options = vec![]; + for pair in option_string.split(',') { + let pair = pair.trim(); + let (key, value) = pair.split_once('=').unwrap(); + let key = key.trim().replace("\'", ""); + let value = value.trim().replace('\'', ""); + // Currently we have only one compaction type, so we ignore it + // Cautious: COMPACTION_TYPE may be kept even if there are no compaction options enabled + if key == COMPACTION_TYPE || key == APPEND_MODE_KEY { + continue; + } else { + let option = AlterTableOption::parse_kv(&key, &value)?; + options.push(option); + } + } + Ok(options) + } +} + +impl Display for AlterTableOption { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AlterTableOption::Ttl(d) => write!(f, "'{}' = '{}'", TTL_KEY, d), + AlterTableOption::TwcsTimeWindow(d) => write!(f, "'{}' = '{}'", TWCS_TIME_WINDOW, d), + AlterTableOption::TwcsMaxOutputFileSize(s) => { + // Caution: to_string loses precision for ReadableSize + write!(f, "'{}' = '{}'", TWCS_MAX_OUTPUT_FILE_SIZE, s) + } + AlterTableOption::TwcsMaxInactiveWindowFiles(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_INACTIVE_WINDOW_FILES, u) + } + AlterTableOption::TwcsMaxActiveWindowFiles(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_ACTIVE_WINDOW_FILES, u) + } + AlterTableOption::TwcsMaxInactiveWindowRuns(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_INACTIVE_WINDOW_RUNS, u) + } + AlterTableOption::TwcsMaxActiveWindowRuns(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_ACTIVE_WINDOW_RUNS, u) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_kv_pairs() { + let option_string = + "compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = 'forever'"; + let options = AlterTableOption::parse_kv_pairs(option_string).unwrap(); + assert_eq!(options.len(), 2); + assert_eq!( + options, + vec![ + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1MB").unwrap()), + AlterTableOption::Ttl(Ttl::Forever), + ] + ); + + let option_string = "compaction.twcs.max_active_window_files = '5030469694939972912', + compaction.twcs.max_active_window_runs = '8361168990283879099', + compaction.twcs.max_inactive_window_files = '6028716566907830876', + compaction.twcs.max_inactive_window_runs = '10622283085591494074', + compaction.twcs.max_output_file_size = '15686.4PiB', + compaction.twcs.time_window = '2061999256ms', + compaction.type = 'twcs', + ttl = '1month 3days 15h 49m 8s 279ms'"; + let options = AlterTableOption::parse_kv_pairs(option_string).unwrap(); + assert_eq!(options.len(), 7); + let expected = vec![ + AlterTableOption::TwcsMaxActiveWindowFiles(5030469694939972912), + AlterTableOption::TwcsMaxActiveWindowRuns(8361168990283879099), + AlterTableOption::TwcsMaxInactiveWindowFiles(6028716566907830876), + AlterTableOption::TwcsMaxInactiveWindowRuns(10622283085591494074), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("15686.4PiB").unwrap()), + AlterTableOption::TwcsTimeWindow(Duration::new_nanosecond(2_061_999_256_000_000)), + AlterTableOption::Ttl(Ttl::Duration(Duration::new_millisecond( + // A month is 2_630_016 seconds + 2_630_016 * 1000 + + 3 * 24 * 60 * 60 * 1000 + + 15 * 60 * 60 * 1000 + + 49 * 60 * 1000 + + 8 * 1000 + + 279, + ))), + ]; + assert_eq!(options, expected); + } } diff --git a/tests-fuzz/src/test_utils.rs b/tests-fuzz/src/test_utils.rs index e65548969a..bef96a1fd7 100644 --- a/tests-fuzz/src/test_utils.rs +++ b/tests-fuzz/src/test_utils.rs @@ -55,5 +55,6 @@ pub fn new_test_ctx() -> TableContext { ], partition: None, primary_keys: vec![], + table_options: vec![], } } diff --git a/tests-fuzz/src/translator.rs b/tests-fuzz/src/translator.rs index 1745aa9336..673b543f2c 100644 --- a/tests-fuzz/src/translator.rs +++ b/tests-fuzz/src/translator.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod common; pub mod mysql; pub mod postgres; diff --git a/tests-fuzz/src/translator/common.rs b/tests-fuzz/src/translator/common.rs new file mode 100644 index 0000000000..2b968ed439 --- /dev/null +++ b/tests-fuzz/src/translator/common.rs @@ -0,0 +1,67 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use super::DslTranslator; +use crate::error::{Error, Result}; +use crate::ir::alter_expr::AlterTableOperation; +use crate::ir::{AlterTableExpr, AlterTableOption}; + +/// Shared translator for `ALTER TABLE` operations. +pub(crate) struct CommonAlterTableTranslator; + +impl DslTranslator for CommonAlterTableTranslator { + type Error = Error; + + fn translate(&self, input: &AlterTableExpr) -> Result { + Ok(match &input.alter_kinds { + AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), + AlterTableOperation::SetTableOptions { options } => { + Self::format_set_table_options(&input.table_name, options) + } + AlterTableOperation::UnsetTableOptions { keys } => { + Self::format_unset_table_options(&input.table_name, keys) + } + _ => unimplemented!(), + }) + } +} + +impl CommonAlterTableTranslator { + fn format_drop(name: impl Display, column: impl Display) -> String { + format!("ALTER TABLE {name} DROP COLUMN {column};") + } + + fn format_set_table_options(name: impl Display, options: &[AlterTableOption]) -> String { + format!( + "ALTER TABLE {name} SET {};", + options + .iter() + .map(|option| option.to_string()) + .collect::>() + .join(", ") + ) + } + + fn format_unset_table_options(name: impl Display, keys: &[String]) -> String { + format!( + "ALTER TABLE {name} UNSET {};", + keys.iter() + .map(|key| format!("'{}'", key)) + .collect::>() + .join(", ") + ) + } +} diff --git a/tests-fuzz/src/translator/mysql/alter_expr.rs b/tests-fuzz/src/translator/mysql/alter_expr.rs index c973d7cb4b..3bf30b09a3 100644 --- a/tests-fuzz/src/translator/mysql/alter_expr.rs +++ b/tests-fuzz/src/translator/mysql/alter_expr.rs @@ -22,6 +22,7 @@ use crate::error::{Error, Result}; use crate::ir::alter_expr::AlterTableOperation; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; +use crate::translator::common::CommonAlterTableTranslator; use crate::translator::DslTranslator; pub struct AlterTableExprTranslator; @@ -30,26 +31,22 @@ impl DslTranslator for AlterTableExprTranslator { type Error = Error; fn translate(&self, input: &AlterTableExpr) -> Result { - Ok(match &input.alter_options { + Ok(match &input.alter_kinds { AlterTableOperation::AddColumn { column, location } => { Self::format_add_column(&input.table_name, column, location) } - AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), AlterTableOperation::RenameTable { new_table_name } => { Self::format_rename(&input.table_name, new_table_name) } AlterTableOperation::ModifyDataType { column } => { Self::format_modify_data_type(&input.table_name, column) } + _ => CommonAlterTableTranslator.translate(input)?, }) } } impl AlterTableExprTranslator { - fn format_drop(name: impl Display, column: impl Display) -> String { - format!("ALTER TABLE {name} DROP COLUMN {column};") - } - fn format_rename(name: impl Display, new_name: impl Display) -> String { format!("ALTER TABLE {name} RENAME {new_name};") } @@ -119,11 +116,15 @@ impl AlterTableExprTranslator { #[cfg(test)] mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::AlterTableExprTranslator; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; use crate::translator::DslTranslator; @@ -132,7 +133,7 @@ mod tests { fn test_alter_table_expr() { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -150,7 +151,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::RenameTable { + alter_kinds: AlterTableOperation::RenameTable { new_table_name: "foo".into(), }, }; @@ -160,7 +161,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::DropColumn { name: "foo".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "foo".into() }, }; let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); @@ -168,7 +169,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::ModifyDataType { + alter_kinds: AlterTableOperation::ModifyDataType { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -180,4 +181,48 @@ mod tests { let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); assert_eq!("ALTER TABLE test MODIFY COLUMN host STRING;", output); } + + #[test] + fn test_alter_table_expr_set_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))), + AlterTableOption::TwcsTimeWindow(Duration::new_second(60)), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1GB").unwrap()), + AlterTableOption::TwcsMaxActiveWindowFiles(10), + AlterTableOption::TwcsMaxActiveWindowRuns(10), + AlterTableOption::TwcsMaxInactiveWindowFiles(5), + AlterTableOption::TwcsMaxInactiveWindowRuns(5), + ], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = concat!( + "ALTER TABLE test SET 'ttl' = '60s', ", + "'compaction.twcs.time_window' = '60s', ", + "'compaction.twcs.max_output_file_size' = '1.0GiB', ", + "'compaction.twcs.max_active_window_files' = '10', ", + "'compaction.twcs.max_active_window_runs' = '10', ", + "'compaction.twcs.max_inactive_window_files' = '5', ", + "'compaction.twcs.max_inactive_window_runs' = '5';" + ); + assert_eq!(expected, output); + } + + #[test] + fn test_alter_table_expr_unset_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec!["ttl".into(), "compaction.twcs.time_window".into()], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = "ALTER TABLE test UNSET 'ttl', 'compaction.twcs.time_window';"; + assert_eq!(expected, output); + } } diff --git a/tests-fuzz/src/translator/postgres/alter_expr.rs b/tests-fuzz/src/translator/postgres/alter_expr.rs index 42db202efe..f66ce0db92 100644 --- a/tests-fuzz/src/translator/postgres/alter_expr.rs +++ b/tests-fuzz/src/translator/postgres/alter_expr.rs @@ -21,6 +21,7 @@ use crate::error::{Error, Result}; use crate::ir::alter_expr::AlterTableOperation; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; +use crate::translator::common::CommonAlterTableTranslator; use crate::translator::postgres::sql_data_type_to_postgres_data_type; use crate::translator::DslTranslator; @@ -30,26 +31,22 @@ impl DslTranslator for AlterTableExprTranslator { type Error = Error; fn translate(&self, input: &AlterTableExpr) -> Result { - Ok(match &input.alter_options { + Ok(match &input.alter_kinds { AlterTableOperation::AddColumn { column, .. } => { Self::format_add_column(&input.table_name, column) } - AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), AlterTableOperation::RenameTable { new_table_name } => { Self::format_rename(&input.table_name, new_table_name) } AlterTableOperation::ModifyDataType { column } => { Self::format_modify_data_type(&input.table_name, column) } + _ => CommonAlterTableTranslator.translate(input)?, }) } } impl AlterTableExprTranslator { - fn format_drop(name: impl Display, column: impl Display) -> String { - format!("ALTER TABLE {name} DROP COLUMN {column};") - } - fn format_rename(name: impl Display, new_name: impl Display) -> String { format!("ALTER TABLE {name} RENAME TO {new_name};") } @@ -116,11 +113,15 @@ impl AlterTableExprTranslator { #[cfg(test)] mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::AlterTableExprTranslator; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; use crate::translator::DslTranslator; @@ -129,7 +130,7 @@ mod tests { fn test_alter_table_expr() { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -145,7 +146,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::RenameTable { + alter_kinds: AlterTableOperation::RenameTable { new_table_name: "foo".into(), }, }; @@ -155,7 +156,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::DropColumn { name: "foo".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "foo".into() }, }; let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); @@ -163,7 +164,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::ModifyDataType { + alter_kinds: AlterTableOperation::ModifyDataType { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -176,4 +177,48 @@ mod tests { // Ignores the location and primary key option. assert_eq!("ALTER TABLE test MODIFY COLUMN host STRING;", output); } + + #[test] + fn test_alter_table_expr_set_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))), + AlterTableOption::TwcsTimeWindow(Duration::new_second(60)), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1GB").unwrap()), + AlterTableOption::TwcsMaxActiveWindowFiles(10), + AlterTableOption::TwcsMaxActiveWindowRuns(10), + AlterTableOption::TwcsMaxInactiveWindowFiles(5), + AlterTableOption::TwcsMaxInactiveWindowRuns(5), + ], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = concat!( + "ALTER TABLE test SET 'ttl' = '60s', ", + "'compaction.twcs.time_window' = '60s', ", + "'compaction.twcs.max_output_file_size' = '1.0GiB', ", + "'compaction.twcs.max_active_window_files' = '10', ", + "'compaction.twcs.max_active_window_runs' = '10', ", + "'compaction.twcs.max_inactive_window_files' = '5', ", + "'compaction.twcs.max_inactive_window_runs' = '5';" + ); + assert_eq!(expected, output); + } + + #[test] + fn test_alter_table_expr_unset_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec!["ttl".into(), "compaction.twcs.time_window".into()], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = "ALTER TABLE test UNSET 'ttl', 'compaction.twcs.time_window';"; + assert_eq!(expected, output); + } } diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs index 7433479789..84222f6d5a 100644 --- a/tests-fuzz/src/utils.rs +++ b/tests-fuzz/src/utils.rs @@ -142,7 +142,7 @@ macro_rules! make_get_from_env_helper { make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ALTER_ACTIONS, 256); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_INSERT_ACTIONS, 8); -make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 2048); +make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 512); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_TABLES, 64); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_COLUMNS, 32); diff --git a/tests-fuzz/src/validator.rs b/tests-fuzz/src/validator.rs index cf2df9af22..406dd66041 100644 --- a/tests-fuzz/src/validator.rs +++ b/tests-fuzz/src/validator.rs @@ -14,3 +14,4 @@ pub mod column; pub mod row; +pub mod table; diff --git a/tests-fuzz/src/validator/table.rs b/tests-fuzz/src/validator/table.rs new file mode 100644 index 0000000000..406719b2d6 --- /dev/null +++ b/tests-fuzz/src/validator/table.rs @@ -0,0 +1,103 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use snafu::{ensure, ResultExt}; +use sqlx::database::HasArguments; +use sqlx::{ColumnIndex, Database, Decode, Encode, Executor, IntoArguments, Row, Type}; + +use crate::error::{self, Result, UnexpectedSnafu}; +use crate::ir::alter_expr::AlterTableOption; + +/// Parses table options from the result of `SHOW CREATE TABLE` +/// An example of the result of `SHOW CREATE TABLE`: +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +/// | Table | Create Table | +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +/// | json | CREATE TABLE IF NOT EXISTS `json` (`ts` TIMESTAMP(3) NOT NULL, `j` JSON NULL, TIME INDEX (`ts`)) ENGINE=mito WITH(compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = '1day') | +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +fn parse_show_create(show_create: &str) -> Result> { + if let Some(option_start) = show_create.find("WITH(") { + let option_end = { + let remain_str = &show_create[option_start..]; + if let Some(end) = remain_str.find(')') { + end + option_start + } else { + return UnexpectedSnafu { + violated: format!("Cannot find the end of the options in: {}", show_create), + } + .fail(); + } + }; + let options = &show_create[option_start + 5..option_end]; + Ok(AlterTableOption::parse_kv_pairs(options)?) + } else { + Ok(vec![]) + } +} + +/// Fetches table options from the context +pub async fn fetch_table_options<'a, DB, E>(e: E, sql: &'a str) -> Result> +where + DB: Database, + >::Arguments: IntoArguments<'a, DB>, + for<'c> E: 'a + Executor<'c, Database = DB>, + for<'c> String: Decode<'c, DB> + Type, + for<'c> String: Encode<'c, DB> + Type, + usize: ColumnIndex<::Row>, +{ + let fetched_rows = sqlx::query(sql) + .fetch_all(e) + .await + .context(error::ExecuteQuerySnafu { sql })?; + ensure!( + fetched_rows.len() == 1, + error::AssertSnafu { + reason: format!( + "Expected fetched row length: 1, got: {}", + fetched_rows.len(), + ) + } + ); + + let row = fetched_rows.first().unwrap(); + let show_create = row.try_get::(1).unwrap(); + parse_show_create(&show_create) +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; + use common_time::Duration; + + use super::*; + use crate::ir::alter_expr::Ttl; + use crate::ir::AlterTableOption; + + #[test] + fn test_parse_show_create() { + let show_create = "CREATE TABLE IF NOT EXISTS `json` (`ts` TIMESTAMP(3) NOT NULL, `j` JSON NULL, TIME INDEX (`ts`)) ENGINE=mito WITH(compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = '1day')"; + let options = parse_show_create(show_create).unwrap(); + assert_eq!(options.len(), 2); + assert_eq!( + options[0], + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1MB").unwrap()) + ); + assert_eq!( + options[1], + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(24 * 60 * 60))) + ); + } +} diff --git a/tests-fuzz/targets/fuzz_alter_logical_table.rs b/tests-fuzz/targets/ddl/fuzz_alter_logical_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_alter_logical_table.rs rename to tests-fuzz/targets/ddl/fuzz_alter_logical_table.rs diff --git a/tests-fuzz/targets/fuzz_alter_table.rs b/tests-fuzz/targets/ddl/fuzz_alter_table.rs similarity index 72% rename from tests-fuzz/targets/fuzz_alter_table.rs rename to tests-fuzz/targets/ddl/fuzz_alter_table.rs index 7f2a809c9e..247d7632ee 100644 --- a/tests-fuzz/targets/fuzz_alter_table.rs +++ b/tests-fuzz/targets/ddl/fuzz_alter_table.rs @@ -34,10 +34,13 @@ use tests_fuzz::fake::{ use tests_fuzz::generator::alter_expr::{ AlterExprAddColumnGeneratorBuilder, AlterExprDropColumnGeneratorBuilder, AlterExprModifyDataTypeGeneratorBuilder, AlterExprRenameGeneratorBuilder, + AlterExprSetTableOptionsGeneratorBuilder, AlterExprUnsetTableOptionsGeneratorBuilder, }; use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder; use tests_fuzz::generator::Generator; -use tests_fuzz::ir::{droppable_columns, modifiable_columns, AlterTableExpr, CreateTableExpr}; +use tests_fuzz::ir::{ + droppable_columns, modifiable_columns, AlterTableExpr, AlterTableOption, CreateTableExpr, +}; use tests_fuzz::translator::mysql::alter_expr::AlterTableExprTranslator; use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; use tests_fuzz::translator::DslTranslator; @@ -62,11 +65,13 @@ struct FuzzInput { } #[derive(Debug, EnumIter)] -enum AlterTableOption { +enum AlterTableKind { AddColumn, DropColumn, RenameTable, ModifyDataType, + SetTableOptions, + UnsetTableOptions, } fn generate_create_table_expr(rng: &mut R) -> Result { @@ -93,23 +98,23 @@ fn generate_alter_table_expr( table_ctx: TableContextRef, rng: &mut R, ) -> Result { - let options = AlterTableOption::iter().collect::>(); - match options[rng.gen_range(0..options.len())] { - AlterTableOption::DropColumn if !droppable_columns(&table_ctx.columns).is_empty() => { + let kinds = AlterTableKind::iter().collect::>(); + match kinds[rng.gen_range(0..kinds.len())] { + AlterTableKind::DropColumn if !droppable_columns(&table_ctx.columns).is_empty() => { AlterExprDropColumnGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(rng) } - AlterTableOption::ModifyDataType if !modifiable_columns(&table_ctx.columns).is_empty() => { + AlterTableKind::ModifyDataType if !modifiable_columns(&table_ctx.columns).is_empty() => { AlterExprModifyDataTypeGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(rng) } - AlterTableOption::RenameTable => AlterExprRenameGeneratorBuilder::default() + AlterTableKind::RenameTable => AlterExprRenameGeneratorBuilder::default() .table_ctx(table_ctx) .name_generator(Box::new(MappedGenerator::new( WordGenerator, @@ -118,6 +123,20 @@ fn generate_alter_table_expr( .build() .unwrap() .generate(rng), + AlterTableKind::SetTableOptions => { + let expr_generator = AlterExprSetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx) + .build() + .unwrap(); + expr_generator.generate(rng) + } + AlterTableKind::UnsetTableOptions => { + let expr_generator = AlterExprUnsetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx) + .build() + .unwrap(); + expr_generator.generate(rng) + } _ => { let location = rng.gen_bool(0.5); let expr_generator = AlterExprAddColumnGeneratorBuilder::default() @@ -179,6 +198,31 @@ async fn execute_alter_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> { let mut columns = table_ctx.columns.clone(); columns.sort_by(|a, b| a.name.value.cmp(&b.name.value)); validator::column::assert_eq(&column_entries, &columns)?; + + // Validates table options + let sql = format!("SHOW CREATE TABLE {}", table_ctx.name); + let mut table_options = validator::table::fetch_table_options(&ctx.greptime, &sql).await?; + table_options.sort_by(|a, b| a.key().cmp(b.key())); + let mut expected_table_options = table_ctx.table_options.clone(); + expected_table_options.sort_by(|a, b| a.key().cmp(b.key())); + table_options + .iter() + .zip(expected_table_options.iter()) + .for_each(|(a, b)| { + if let ( + AlterTableOption::TwcsMaxOutputFileSize(a), + AlterTableOption::TwcsMaxOutputFileSize(b), + ) = (a, b) + { + // to_string loses precision for ReadableSize, so the size in generated SQL is not the same as the size in the table context, + // but the string representation should be the same. For example: + // to_string() from_str() + // ReadableSize(13001360408898724524) ------------> "11547.5PiB" -----------> ReadableSize(13001329174265200640) + assert_eq!(a.to_string(), b.to_string()); + } else { + assert_eq!(a, b); + } + }); } // Cleans up diff --git a/tests-fuzz/targets/fuzz_create_database.rs b/tests-fuzz/targets/ddl/fuzz_create_database.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_database.rs rename to tests-fuzz/targets/ddl/fuzz_create_database.rs diff --git a/tests-fuzz/targets/fuzz_create_logical_table.rs b/tests-fuzz/targets/ddl/fuzz_create_logical_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_logical_table.rs rename to tests-fuzz/targets/ddl/fuzz_create_logical_table.rs diff --git a/tests-fuzz/targets/fuzz_create_table.rs b/tests-fuzz/targets/ddl/fuzz_create_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_table.rs rename to tests-fuzz/targets/ddl/fuzz_create_table.rs diff --git a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs index d4fa4d08fd..5bcddea53a 100644 --- a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs +++ b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs @@ -229,6 +229,29 @@ async fn create_logical_table_and_insert_values( Ok(()) } +async fn wait_for_migration(ctx: &FuzzContext, migration: &Migration, procedure_id: &str) { + info!("Waits for migration: {migration:?}"); + let region_id = migration.region_id.as_u64(); + wait_condition_fn( + Duration::from_secs(120), + || { + let greptime = ctx.greptime.clone(); + let procedure_id = procedure_id.to_string(); + Box::pin(async move { + let output = procedure_state(&greptime, &procedure_id).await; + info!("Checking procedure: {procedure_id}, output: {output}"); + (fetch_partition(&greptime, region_id).await.unwrap(), output) + }) + }, + |(partition, output)| { + info!("Region: {region_id}, datanode: {}", partition.datanode_id); + partition.datanode_id == migration.to_peer && output.contains("Done") + }, + Duration::from_secs(1), + ) + .await; +} + async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { let mut rng = ChaCha20Rng::seed_from_u64(input.seed); // Creates a physical table. @@ -297,28 +320,7 @@ async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { } info!("Excepted new region distribution: {new_distribution:?}"); for (migration, procedure_id) in migrations.clone().into_iter().zip(procedure_ids) { - info!("Waits for migration: {migration:?}"); - let region_id = migration.region_id.as_u64(); - wait_condition_fn( - Duration::from_secs(120), - || { - let greptime = ctx.greptime.clone(); - let procedure_id = procedure_id.to_string(); - Box::pin(async move { - { - let output = procedure_state(&greptime, &procedure_id).await; - info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() - } - }) - }, - |partition| { - info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer - }, - Duration::from_secs(1), - ) - .await; + wait_for_migration(&ctx, &migration, &procedure_id).await; } // Validates value rows @@ -388,29 +390,8 @@ async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { procedure_ids.push(procedure_id); } info!("Excepted new region distribution: {new_distribution:?}"); - for (migration, procedure_id) in migrations.into_iter().zip(procedure_ids) { - info!("Waits for migration: {migration:?}"); - let region_id = migration.region_id.as_u64(); - wait_condition_fn( - Duration::from_secs(120), - || { - let greptime = ctx.greptime.clone(); - let procedure_id = procedure_id.to_string(); - Box::pin(async move { - { - let output = procedure_state(&greptime, &procedure_id).await; - info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() - } - }) - }, - |partition| { - info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer - }, - Duration::from_secs(1), - ) - .await; + for (migration, procedure_id) in migrations.clone().into_iter().zip(procedure_ids) { + wait_for_migration(&ctx, &migration, &procedure_id).await; } // Creates more logical tables and inserts values diff --git a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs index 3f15e859c4..12c4cdae49 100644 --- a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs +++ b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs @@ -248,13 +248,13 @@ async fn migrate_regions(ctx: &FuzzContext, migrations: &[Migration]) -> Result< { let output = procedure_state(&greptime, &procedure_id).await; info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() + (fetch_partition(&greptime, region_id).await.unwrap(), output) } }) }, - |partition| { + |(partition, output)| { info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer + partition.datanode_id == migration.to_peer && output.contains("Done") }, Duration::from_secs(5), ) diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 4da65f0b21..fb28247908 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -361,6 +361,14 @@ pub async fn test_sql_api(store_type: StorageType) { let body = serde_json::from_str::(&res.text().await).unwrap(); assert_eq!(body.code(), ErrorCode::DatabaseNotFound as u32); + // test parse method + let res = client.get("/v1/sql/parse?sql=desc table t").send().await; + assert_eq!(res.status(), StatusCode::OK); + assert_eq!( + res.text().await, + "[{\"DescribeTable\":{\"name\":[{\"value\":\"t\",\"quote_style\":null}]}}]" + ); + // test timezone header let res = client .get("/v1/sql?&sql=show variables system_time_zone") @@ -938,6 +946,7 @@ create_on_flush = "auto" create_on_compaction = "auto" apply_on_query = "auto" mem_threshold_on_create = "auto" +content_cache_page_size = "8MiB" [region_engine.mito.fulltext_index] create_on_flush = "auto" @@ -1311,7 +1320,7 @@ pub async fn test_test_pipeline_api(store_type: StorageType) { // handshake let client = TestClient::new(app); - let body = r#" + let pipeline_content = r#" processors: - date: field: time @@ -1338,7 +1347,7 @@ transform: let res = client .post("/v1/events/pipelines/test") .header("Content-Type", "application/x-yaml") - .body(body) + .body(pipeline_content) .send() .await; @@ -1359,8 +1368,87 @@ transform: let pipeline = pipelines.first().unwrap(); assert_eq!(pipeline.get("name").unwrap(), "test"); - // 2. write data - let data_body = r#" + let dryrun_schema = json!([ + { + "colume_type": "FIELD", + "data_type": "INT32", + "fulltext": false, + "name": "id1" + }, + { + "colume_type": "FIELD", + "data_type": "INT32", + "fulltext": false, + "name": "id2" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "type" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "log" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "logger" + }, + { + "colume_type": "TIMESTAMP", + "data_type": "TIMESTAMP_NANOSECOND", + "fulltext": false, + "name": "time" + } + ]); + let dryrun_rows = json!([ + [ + { + "data_type": "INT32", + "key": "id1", + "semantic_type": "FIELD", + "value": 2436 + }, + { + "data_type": "INT32", + "key": "id2", + "semantic_type": "FIELD", + "value": 2528 + }, + { + "data_type": "STRING", + "key": "type", + "semantic_type": "FIELD", + "value": "I" + }, + { + "data_type": "STRING", + "key": "log", + "semantic_type": "FIELD", + "value": "ClusterAdapter:enter sendTextDataToCluster\\n" + }, + { + "data_type": "STRING", + "key": "logger", + "semantic_type": "FIELD", + "value": "INTERACT.MANAGER" + }, + { + "data_type": "TIMESTAMP_NANOSECOND", + "key": "time", + "semantic_type": "TIMESTAMP", + "value": "2024-05-25 20:16:37.217+0000" + } + ] + ]); + { + // test original api + let data_body = r#" [ { "id1": "2436", @@ -1372,100 +1460,100 @@ transform: } ] "#; - let res = client - .post("/v1/events/pipelines/dryrun?pipeline_name=test") - .header("Content-Type", "application/json") - .body(data_body) - .send() - .await; - assert_eq!(res.status(), StatusCode::OK); - let body: Value = res.json().await; - let schema = &body["schema"]; - let rows = &body["rows"]; - assert_eq!( - schema, - &json!([ + let res = client + .post("/v1/events/pipelines/dryrun?pipeline_name=test") + .header("Content-Type", "application/json") + .body(data_body) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // test new api specify pipeline via pipeline_name + let body = r#" { - "colume_type": "FIELD", - "data_type": "INT32", - "fulltext": false, - "name": "id1" - }, - { - "colume_type": "FIELD", - "data_type": "INT32", - "fulltext": false, - "name": "id2" - }, - { - "colume_type": "FIELD", - "data_type": "STRING", - "fulltext": false, - "name": "type" - }, - { - "colume_type": "FIELD", - "data_type": "STRING", - "fulltext": false, - "name": "log" - }, - { - "colume_type": "FIELD", - "data_type": "STRING", - "fulltext": false, - "name": "logger" - }, - { - "colume_type": "TIMESTAMP", - "data_type": "TIMESTAMP_NANOSECOND", - "fulltext": false, - "name": "time" - } - ]) - ); - assert_eq!( - rows, - &json!([ - [ + "pipeline_name": "test", + "data": [ { - "data_type": "INT32", - "key": "id1", - "semantic_type": "FIELD", - "value": 2436 - }, - { - "data_type": "INT32", - "key": "id2", - "semantic_type": "FIELD", - "value": 2528 - }, - { - "data_type": "STRING", - "key": "type", - "semantic_type": "FIELD", - "value": "I" - }, - { - "data_type": "STRING", - "key": "log", - "semantic_type": "FIELD", - "value": "ClusterAdapter:enter sendTextDataToCluster\\n" - }, - { - "data_type": "STRING", - "key": "logger", - "semantic_type": "FIELD", - "value": "INTERACT.MANAGER" - }, - { - "data_type": "TIMESTAMP_NANOSECOND", - "key": "time", - "semantic_type": "TIMESTAMP", - "value": "2024-05-25 20:16:37.217+0000" + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" } ] - ]) - ); + } + "#; + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // test new api specify pipeline via pipeline raw data + let mut body = json!({ + "data": [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + }); + body["pipeline"] = json!(pipeline_content); + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // failback to old version api + // not pipeline and pipeline_name in the body + let body = json!({ + "data": [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + }); + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await; + assert_eq!(res.status(), StatusCode::BAD_REQUEST); + } guard.remove_all().await; } @@ -1728,11 +1816,17 @@ pub async fn test_loki_logs(store_type: StorageType) { // init loki request let req: PushRequest = PushRequest { streams: vec![StreamAdapter { - labels: "{service=\"test\",source=\"integration\"}".to_string(), - entries: vec![EntryAdapter { - timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), - line: "this is a log message".to_string(), - }], + labels: r#"{service="test",source="integration","wadaxi"="do anything"}"#.to_string(), + entries: vec![ + EntryAdapter { + timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), + line: "this is a log message".to_string(), + }, + EntryAdapter { + timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), + line: "this is a log message".to_string(), + }, + ], hash: rand::random(), }], }; @@ -1760,7 +1854,7 @@ pub async fn test_loki_logs(store_type: StorageType) { assert_eq!(StatusCode::OK, res.status()); // test schema - let expected = "[[\"loki_table_name\",\"CREATE TABLE IF NOT EXISTS \\\"loki_table_name\\\" (\\n \\\"greptime_timestamp\\\" TIMESTAMP(9) NOT NULL,\\n \\\"line\\\" STRING NULL,\\n \\\"service\\\" STRING NULL,\\n \\\"source\\\" STRING NULL,\\n TIME INDEX (\\\"greptime_timestamp\\\"),\\n PRIMARY KEY (\\\"service\\\", \\\"source\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; + let expected = "[[\"loki_table_name\",\"CREATE TABLE IF NOT EXISTS \\\"loki_table_name\\\" (\\n \\\"greptime_timestamp\\\" TIMESTAMP(9) NOT NULL,\\n \\\"line\\\" STRING NULL,\\n \\\"service\\\" STRING NULL,\\n \\\"source\\\" STRING NULL,\\n \\\"wadaxi\\\" STRING NULL,\\n TIME INDEX (\\\"greptime_timestamp\\\"),\\n PRIMARY KEY (\\\"service\\\", \\\"source\\\", \\\"wadaxi\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; validate_data( "loki_schema", &client, @@ -1770,7 +1864,7 @@ pub async fn test_loki_logs(store_type: StorageType) { .await; // test content - let expected = r#"[[1730976830000000000,"this is a log message","test","integration"]]"#; + let expected = r#"[[1730976830000000000,"this is a log message","test","integration","do anything"],[1730976830000000000,"this is a log message","test","integration","do anything"]]"#; validate_data( "loki_content", &client, diff --git a/tests/cases/standalone/common/alter/alter_table.result b/tests/cases/standalone/common/alter/alter_table.result index 120e7695d0..5c1dbfca77 100644 --- a/tests/cases/standalone/common/alter/alter_table.result +++ b/tests/cases/standalone/common/alter/alter_table.result @@ -140,10 +140,17 @@ ADD Affected Rows: 0 +ALTER TABLE + t2 +ADD + COLUMN at4 UINT16; + +Affected Rows: 0 + INSERT INTO t2 VALUES - ("loc_1", "loc_2", "loc_3", 'job1', 0, 1); + ("loc_1", "loc_2", "loc_3", 2, 'job1', 0, 1); Affected Rows: 1 @@ -152,11 +159,11 @@ SELECT FROM t2; -+-------+-------+-------+------+---------------------+-----+ -| at | at2 | at3 | job | ts | val | -+-------+-------+-------+------+---------------------+-----+ -| loc_1 | loc_2 | loc_3 | job1 | 1970-01-01T00:00:00 | 1.0 | -+-------+-------+-------+------+---------------------+-----+ ++-------+-------+-------+-----+------+---------------------+-----+ +| at | at2 | at3 | at4 | job | ts | val | ++-------+-------+-------+-----+------+---------------------+-----+ +| loc_1 | loc_2 | loc_3 | 2 | job1 | 1970-01-01T00:00:00 | 1.0 | ++-------+-------+-------+-----+------+---------------------+-----+ DROP TABLE t1; diff --git a/tests/cases/standalone/common/alter/alter_table.sql b/tests/cases/standalone/common/alter/alter_table.sql index 7f3e0b6640..c52a2445db 100644 --- a/tests/cases/standalone/common/alter/alter_table.sql +++ b/tests/cases/standalone/common/alter/alter_table.sql @@ -67,10 +67,15 @@ ALTER TABLE ADD COLUMN at2 STRING; +ALTER TABLE + t2 +ADD + COLUMN at4 UINT16; + INSERT INTO t2 VALUES - ("loc_1", "loc_2", "loc_3", 'job1', 0, 1); + ("loc_1", "loc_2", "loc_3", 2, 'job1', 0, 1); SELECT * diff --git a/tests/cases/standalone/common/create/create_with_skip_index.result b/tests/cases/standalone/common/create/create_with_skip_index.result new file mode 100644 index 0000000000..00dd24dc6c --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.result @@ -0,0 +1,33 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +Affected Rows: 0 + +show +create table + skipping_table; + ++----------------+---------------------------------------------------------------------------------+ +| Table | Create Table | ++----------------+---------------------------------------------------------------------------------+ +| skipping_table | CREATE TABLE IF NOT EXISTS "skipping_table" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "id" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'), | +| | "name" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+---------------------------------------------------------------------------------+ + +drop table skipping_table; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/create/create_with_skip_index.sql b/tests/cases/standalone/common/create/create_with_skip_index.sql new file mode 100644 index 0000000000..0558936699 --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.sql @@ -0,0 +1,14 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +show +create table + skipping_table; + +drop table skipping_table; diff --git a/tests/cases/standalone/common/flow/flow_rebuild.result b/tests/cases/standalone/common/flow/flow_rebuild.result new file mode 100644 index 0000000000..67fd43a032 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_rebuild.result @@ -0,0 +1,578 @@ +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- combination of different order of rebuild input table/flow +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +-- test again, this time with db restart +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- combination of different order of rebuild input table/flow +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (26, "2021-07-01 00:00:02.000"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/flow/flow_rebuild.sql b/tests/cases/standalone/common/flow/flow_rebuild.sql new file mode 100644 index 0000000000..288d6f1f03 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_rebuild.sql @@ -0,0 +1,319 @@ +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; + +DROP FLOW test_wildcard_basic; + +-- combination of different order of rebuild input table/flow + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; +DROP FLOW test_wildcard_basic; +DROP TABLE out_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE out_basic; + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +-- test again, this time with db restart +DROP TABLE input_basic; +DROP TABLE out_basic; +DROP FLOW test_wildcard_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; + +DROP FLOW test_wildcard_basic; + +-- combination of different order of rebuild input table/flow + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (26, "2021-07-01 00:00:02.000"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; +DROP FLOW test_wildcard_basic; +DROP TABLE out_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE out_basic; + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.result b/tests/cases/standalone/common/function/vector/vector_scalar.result new file mode 100644 index 0000000000..a379c385fa --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_scalar.result @@ -0,0 +1,96 @@ +SELECT vec_to_string(vec_scalar_add(1.0, '[1.0, 2.0]')); + ++--------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(1),Utf8("[1.0, 2.0]"))) | ++--------------------------------------------------------------+ +| [2,3] | ++--------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1.0, '[1.0, 2.0]')); + ++---------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(-1),Utf8("[1.0, 2.0]"))) | ++---------------------------------------------------------------+ +| [0,1] | ++---------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(1.0, parse_vec('[1.0, 2.0]'))); + ++-------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(1),parse_vec(Utf8("[1.0, 2.0]")))) | ++-------------------------------------------------------------------------+ +| [2,3] | ++-------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); + ++--------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(-1),parse_vec(Utf8("[1.0, 2.0]")))) | ++--------------------------------------------------------------------------+ +| [0,1] | ++--------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); + ++------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Int64(1),Utf8("[1.0, 2.0]"))) | ++------------------------------------------------------------+ +| [2,3] | ++------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); + ++-------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Int64(-1),Utf8("[1.0, 2.0]"))) | ++-------------------------------------------------------------+ +| [0,1] | ++-------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1.0, '[1.0, 2.0]')); + ++--------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(1),Utf8("[1.0, 2.0]"))) | ++--------------------------------------------------------------+ +| [1,2] | ++--------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + ++-----------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),Utf8("[2.0, 4.0]"))) | ++-----------------------------------------------------------------+ +| [-1,-2] | ++-----------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1.0, parse_vec('[1.0, 2.0]'))); + ++-------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(1),parse_vec(Utf8("[1.0, 2.0]")))) | ++-------------------------------------------------------------------------+ +| [1,2] | ++-------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, parse_vec('[2.0, 4.0]'))); + ++----------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),parse_vec(Utf8("[2.0, 4.0]")))) | ++----------------------------------------------------------------------------+ +| [-1,-2] | ++----------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1, '[1.0, 2.0]')); + ++------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Int64(1),Utf8("[1.0, 2.0]"))) | ++------------------------------------------------------------+ +| [1,2] | ++------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + ++-----------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),Utf8("[2.0, 4.0]"))) | ++-----------------------------------------------------------------+ +| [-1,-2] | ++-----------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.sql b/tests/cases/standalone/common/function/vector/vector_scalar.sql new file mode 100644 index 0000000000..2727f29705 --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_scalar.sql @@ -0,0 +1,23 @@ +SELECT vec_to_string(vec_scalar_add(1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(-1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + +SELECT vec_to_string(vec_scalar_mul(1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_mul(-0.5, parse_vec('[2.0, 4.0]'))); + +SELECT vec_to_string(vec_scalar_mul(1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); \ No newline at end of file diff --git a/tests/cases/standalone/common/insert/merge_mode.result b/tests/cases/standalone/common/insert/merge_mode.result index f96ad2c8bc..a98f6b6e38 100644 --- a/tests/cases/standalone/common/insert/merge_mode.result +++ b/tests/cases/standalone/common/insert/merge_mode.result @@ -92,6 +92,71 @@ DROP TABLE last_row_table; Affected Rows: 0 +CREATE TABLE IF NOT EXISTS `delete_between` ( + `time` TIMESTAMP(0) NOT NULL, + `code` STRING NULL, + `name` STRING NULL, + `status` TINYINT NULL, + TIME INDEX (`time`), + PRIMARY KEY (`code`) +) ENGINE=mito WITH( + merge_mode = 'last_non_null' +); + +Affected Rows: 0 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png', 0); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png', 0); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png', 1); + +Affected Rows: 1 + +SELECT * FROM `delete_between`; + ++---------------------+------+-------+--------+ +| time | code | name | status | ++---------------------+------+-------+--------+ +| 2024-11-26T10:00:00 | achn | 1.png | 0 | +| 2024-11-26T10:01:00 | achn | 2.png | 0 | +| 2024-11-26T10:02:00 | achn | 3.png | 1 | ++---------------------+------+-------+--------+ + +DELETE FROM `delete_between`; + +Affected Rows: 3 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png'); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png'); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png'); + +Affected Rows: 1 + +SELECT * FROM `delete_between`; + ++---------------------+------+-------+--------+ +| time | code | name | status | ++---------------------+------+-------+--------+ +| 2024-11-26T10:00:00 | achn | 1.png | | +| 2024-11-26T10:01:00 | achn | 2.png | | +| 2024-11-26T10:02:00 | achn | 3.png | | ++---------------------+------+-------+--------+ + +DROP TABLE `delete_between`; + +Affected Rows: 0 + create table if not exists invalid_merge_mode( host string, ts timestamp, diff --git a/tests/cases/standalone/common/insert/merge_mode.sql b/tests/cases/standalone/common/insert/merge_mode.sql index 967f949333..9d22cc13d6 100644 --- a/tests/cases/standalone/common/insert/merge_mode.sql +++ b/tests/cases/standalone/common/insert/merge_mode.sql @@ -44,6 +44,33 @@ SELECT * from last_row_table ORDER BY host, ts; DROP TABLE last_row_table; +CREATE TABLE IF NOT EXISTS `delete_between` ( + `time` TIMESTAMP(0) NOT NULL, + `code` STRING NULL, + `name` STRING NULL, + `status` TINYINT NULL, + TIME INDEX (`time`), + PRIMARY KEY (`code`) +) ENGINE=mito WITH( + merge_mode = 'last_non_null' +); + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png', 0); +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png', 0); +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png', 1); + +SELECT * FROM `delete_between`; + +DELETE FROM `delete_between`; + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png'); +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png'); +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png'); + +SELECT * FROM `delete_between`; + +DROP TABLE `delete_between`; + create table if not exists invalid_merge_mode( host string, ts timestamp, diff --git a/tests/cases/standalone/common/parser/parser.result b/tests/cases/standalone/common/parser/parser.result new file mode 100644 index 0000000000..7e6dce85b7 --- /dev/null +++ b/tests/cases/standalone/common/parser/parser.result @@ -0,0 +1,50 @@ +-- columns aliases, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/columns_aliases.test +CREATE TABLE integers (ts TIMESTAMP TIME INDEX, i INT, j INT); + +Affected Rows: 0 + +INSERT INTO integers SELECT 0::TIMESTAMP ts, 42 i, 84 j UNION ALL SELECT 1::TIMESTAMP, 13, 14; + +Affected Rows: 2 + +SELECT i, j FROM (SELECT COLUMNS(*)::VARCHAR FROM integers); + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Invalid function 'columns'. +Did you mean 'COUNT'? + +SELECT i, j FROM (SELECT * FROM integers); + ++----+----+ +| i | j | ++----+----+ +| 42 | 84 | +| 13 | 14 | ++----+----+ + +SELECT min_i, min_j, max_i, max_j FROM (SELECT MIN(i) AS "min_i", MAX(i) AS "max_i", MIN(j) AS "min_j", MAX(j) AS "max_j" FROM integers); + ++-------+-------+-------+-------+ +| min_i | min_j | max_i | max_j | ++-------+-------+-------+-------+ +| 13 | 14 | 42 | 84 | ++-------+-------+-------+-------+ + +DROP TABLE integers; + +Affected Rows: 0 + +-- skipped, unsupported feature: digit separators +-- SELECT 1_000_000; +-- skipped, unsupported feature: division operator precedence +-- SELECT 6 + 1 // 2; +-- expression depth, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/expression_depth_limit.test +SELECT (1+(1+(1+(1+(1+(1+(1+1))))))); + ++---------------------------------------------------------------------------------------+ +| Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) | ++---------------------------------------------------------------------------------------+ +| 8 | ++---------------------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/parser/parser.sql b/tests/cases/standalone/common/parser/parser.sql new file mode 100644 index 0000000000..bd7dcbf400 --- /dev/null +++ b/tests/cases/standalone/common/parser/parser.sql @@ -0,0 +1,35 @@ + +-- columns aliases, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/columns_aliases.test + +CREATE TABLE integers (ts TIMESTAMP TIME INDEX, i INT, j INT); + +INSERT INTO integers SELECT 0::TIMESTAMP ts, 42 i, 84 j UNION ALL SELECT 1::TIMESTAMP, 13, 14; + +SELECT i, j FROM (SELECT COLUMNS(*)::VARCHAR FROM integers); + +SELECT i, j FROM (SELECT * FROM integers); + +SELECT min_i, min_j, max_i, max_j FROM (SELECT MIN(i) AS "min_i", MAX(i) AS "max_i", MIN(j) AS "min_j", MAX(j) AS "max_j" FROM integers); + +DROP TABLE integers; + +-- skipped, unsupported feature: digit separators +-- SELECT 1_000_000; + +-- skipped, unsupported feature: division operator precedence +-- SELECT 6 + 1 // 2; + +-- expression depth, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/expression_depth_limit.test +SELECT (1+(1+(1+(1+(1+(1+(1+1))))))); + +-- skipped, unsupported feature: dollar quotes +-- SELECT $$$$ = ''; + +-- skipped, unsupported feature: from_first, see also: +-- https://github.com/GreptimeTeam/greptimedb/issues/5012 +-- FROM integers; + +-- skipped, unsupported feature: function chaining +-- SELECT "abcd".upper().lower(); diff --git a/tests/cases/standalone/common/promql/label.result b/tests/cases/standalone/common/promql/label.result new file mode 100644 index 0000000000..42ba33ca92 --- /dev/null +++ b/tests/cases/standalone/common/promql/label.result @@ -0,0 +1,199 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + idc STRING, + val BIGINT, + PRIMARY KEY(host, idc), +); + +Affected Rows: 0 + +INSERT INTO TABLE test VALUES + (0, 'host1', 'idc1', 1), + (0, 'host2', 'idc1', 2), + (5000, 'host1', 'idc2:zone1',3), + (5000, 'host2', 'idc2',4), + (10000, 'host1', 'idc3:zone2',5), + (10000, 'host2', 'idc3',6), + (15000, 'host1', 'idc4:zone3',7), + (15000, 'host2', 'idc4',8); + +Affected Rows: 8 + +-- Missing source labels -- +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-"); + +Error: 1004(InvalidArguments), Invalid function argument for label_join + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "host"); + ++---------------------+-----+-------+------------+ +| ts | val | host | idc | ++---------------------+-----+-------+------------+ +| 1970-01-01T00:00:00 | 1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | host1 | idc4:zone3 | ++---------------------+-----+-------+------------+ + +-- dst_label is in source labels -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "idc", "host"); + ++---------------------+-----+------------------+------------+ +| ts | val | host | idc | ++---------------------+-----+------------------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2-host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2-host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3-host1 | idc4:zone3 | ++---------------------+-----+------------------+------------+ + +-- test the empty source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", ""); + ++---------------------+-----+------+------------+ +| ts | val | host | idc | ++---------------------+-----+------+------------+ +| 1970-01-01T00:00:00 | 1 | | idc1 | +| 1970-01-01T00:00:05 | 1 | | idc1 | +| 1970-01-01T00:00:05 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | | idc1 | +| 1970-01-01T00:00:10 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | | idc1 | +| 1970-01-01T00:00:15 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | | idc4:zone3 | ++---------------------+-----+------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-", "idc", "host"); + ++---------------------+-----+------------------+-------+------------+ +| ts | val | new_host | host | idc | ++---------------------+-----+------------------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2-host1 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2-host1 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3-host1 | host1 | idc4:zone3 | ++---------------------+-----+------------------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+---------+-------+------------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+---------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | zone3 | host1 | idc4:zone3 | ++---------------------+-----+---------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*"); + ++---------------------+-----+------------+-------+------------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+------------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3 | host1 | idc4:zone3 | ++---------------------+-----+------------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "new_idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+---------+-------+------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+---------+-------+------+ +| 1970-01-01T00:00:00 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:05 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:05 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:10 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:10 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:10 | 6 | idc3 | host2 | idc3 | +| 1970-01-01T00:00:15 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:15 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:15 | 6 | idc3 | host2 | idc3 | +| 1970-01-01T00:00:15 | 8 | idc4 | host2 | idc4 | ++---------------------+-----+---------+-------+------+ + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+------+-------+ +| ts | val | idc | host | ++---------------------+-----+------+-------+ +| 1970-01-01T00:00:00 | 2 | idc1 | host2 | +| 1970-01-01T00:00:05 | 2 | idc1 | host2 | +| 1970-01-01T00:00:05 | 4 | idc2 | host2 | +| 1970-01-01T00:00:10 | 2 | idc1 | host2 | +| 1970-01-01T00:00:10 | 4 | idc2 | host2 | +| 1970-01-01T00:00:10 | 6 | idc3 | host2 | +| 1970-01-01T00:00:15 | 2 | idc1 | host2 | +| 1970-01-01T00:00:15 | 4 | idc2 | host2 | +| 1970-01-01T00:00:15 | 6 | idc3 | host2 | +| 1970-01-01T00:00:15 | 8 | idc4 | host2 | ++---------------------+-----+------+-------+ + +-- test the empty source label -- +-- TODO(dennis): we can't remove the label currently -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); + ++---------------------+-----+-----+-------+ +| ts | val | idc | host | ++---------------------+-----+-----+-------+ +| 1970-01-01T00:00:00 | 2 | | host2 | +| 1970-01-01T00:00:05 | 2 | | host2 | +| 1970-01-01T00:00:05 | 4 | | host2 | +| 1970-01-01T00:00:10 | 2 | | host2 | +| 1970-01-01T00:00:10 | 4 | | host2 | +| 1970-01-01T00:00:10 | 6 | | host2 | +| 1970-01-01T00:00:15 | 2 | | host2 | +| 1970-01-01T00:00:15 | 4 | | host2 | +| 1970-01-01T00:00:15 | 6 | | host2 | +| 1970-01-01T00:00:15 | 8 | | host2 | ++---------------------+-----+-----+-------+ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/promql/label.sql b/tests/cases/standalone/common/promql/label.sql new file mode 100644 index 0000000000..3b9058c27e --- /dev/null +++ b/tests/cases/standalone/common/promql/label.sql @@ -0,0 +1,55 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + idc STRING, + val BIGINT, + PRIMARY KEY(host, idc), +); + +INSERT INTO TABLE test VALUES + (0, 'host1', 'idc1', 1), + (0, 'host2', 'idc1', 2), + (5000, 'host1', 'idc2:zone1',3), + (5000, 'host2', 'idc2',4), + (10000, 'host1', 'idc3:zone2',5), + (10000, 'host2', 'idc3',6), + (15000, 'host1', 'idc4:zone3',7), + (15000, 'host2', 'idc4',8); + +-- Missing source labels -- +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-"); + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "host"); + +-- dst_label is in source labels -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "idc", "host"); + +-- test the empty source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", ""); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-", "idc", "host"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "$2", "idc", "(.*):(.*)"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "new_idc", "$2", "idc", "(.*):(.*)"); + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(.*):(.*)"); + +-- test the empty source label -- +-- TODO(dennis): we can't remove the label currently -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); + +DROP TABLE test; diff --git a/tests/cases/standalone/common/select/prune.result b/tests/cases/standalone/common/select/prune.result index 13ddee5510..04282b6035 100644 --- a/tests/cases/standalone/common/select/prune.result +++ b/tests/cases/standalone/common/select/prune.result @@ -94,6 +94,32 @@ explain analyze select * from demo where idc='idc1'; |_|_| Total rows: 2_| +-+-+-+ +SELECT * FROM demo where host in ('test1'); + ++-------------------------+-------+-------+------+-----------+ +| ts | value | host | idc | collector | ++-------------------------+-------+-------+------+-----------+ +| 1970-01-01T00:00:00.001 | 2.0 | test1 | idc1 | disk | ++-------------------------+-------+-------+------+-----------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze SELECT * FROM demo where host in ('test1'); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + drop table demo; Affected Rows: 0 diff --git a/tests/cases/standalone/common/select/prune.sql b/tests/cases/standalone/common/select/prune.sql index e7fd643537..4b976cdb1c 100644 --- a/tests/cases/standalone/common/select/prune.sql +++ b/tests/cases/standalone/common/select/prune.sql @@ -27,4 +27,14 @@ select * from demo where collector='disk' order by ts; -- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED explain analyze select * from demo where idc='idc1'; +SELECT * FROM demo where host in ('test1'); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze SELECT * FROM demo where host in ('test1'); + drop table demo; diff --git a/tests/cases/standalone/common/show/show_create.result b/tests/cases/standalone/common/show/show_create.result index ec692c0f29..85536954d4 100644 --- a/tests/cases/standalone/common/show/show_create.result +++ b/tests/cases/standalone/common/show/show_create.result @@ -46,6 +46,22 @@ SHOW CREATE TABLE system_metrics; | | ) | +----------------+-----------------------------------------------------------+ +SHOW CREATE TABLE system_metrics FOR POSTGRES_FOREIGN_TABLE; + ++----------------+------------------------------------------+ +| Table | Create Table | ++----------------+------------------------------------------+ +| system_metrics | CREATE FOREIGN TABLE ft_system_metrics ( | +| | "id" INT4, | +| | "host" VARCHAR, | +| | "cpu" FLOAT8, | +| | "disk" FLOAT4, | +| | "ts" TIMESTAMP | +| | ) | +| | SERVER greptimedb | +| | OPTIONS (table_name 'system_metrics') | ++----------------+------------------------------------------+ + DROP TABLE system_metrics; Affected Rows: 0 @@ -141,6 +157,20 @@ show create table t1; | | ) | +-------+-----------------------------------+ +SHOW CREATE TABLE t1 FOR POSTGRES_FOREIGN_TABLE; + ++-------+------------------------------+ +| Table | Create Table | ++-------+------------------------------+ +| t1 | CREATE FOREIGN TABLE ft_t1 ( | +| | "host" VARCHAR, | +| | "ts" TIMESTAMP, | +| | "val" FLOAT8 | +| | ) | +| | SERVER greptimedb | +| | OPTIONS (table_name 't1') | ++-------+------------------------------+ + drop table t1; Affected Rows: 0 diff --git a/tests/cases/standalone/common/show/show_create.sql b/tests/cases/standalone/common/show/show_create.sql index 45c8f7a3ef..5289df6e76 100644 --- a/tests/cases/standalone/common/show/show_create.sql +++ b/tests/cases/standalone/common/show/show_create.sql @@ -20,6 +20,8 @@ WITH( SHOW CREATE TABLE system_metrics; +SHOW CREATE TABLE system_metrics FOR POSTGRES_FOREIGN_TABLE; + DROP TABLE system_metrics; create table table_without_partition ( @@ -57,6 +59,8 @@ show create table phy; show create table t1; +SHOW CREATE TABLE t1 FOR POSTGRES_FOREIGN_TABLE; + drop table t1; drop table phy; diff --git a/tests/cases/standalone/common/show/show_index.result b/tests/cases/standalone/common/show/show_index.result index 995da87c13..6f179687db 100644 --- a/tests/cases/standalone/common/show/show_index.result +++ b/tests/cases/standalone/common/show/show_index.result @@ -1,11 +1,15 @@ CREATE TABLE IF NOT EXISTS system_metrics ( host STRING, - idc STRING, + idc STRING FULLTEXT, cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, + desc1 STRING, + desc2 STRING FULLTEXT, + desc3 STRING FULLTEXT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), + INVERTED INDEX(idc, desc1, desc2), TIME INDEX(ts) ); @@ -33,28 +37,34 @@ SHOW INDEX FROM test; +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ | test | 1 | PRIMARY | 1 | a | A | | | | YES | greptime-inverted-index-v1 | | | YES | | | test | 1 | PRIMARY | 2 | b | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| test | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | +| test | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | PRIMARY | 2 | idc | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| system_metrics | 1 | INVERTED INDEX | 6 | desc1 | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | INVERTED INDEX, FULLTEXT INDEX | 7 | desc2 | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | FULLTEXT INDEX | 8 | desc3 | A | | | | YES | greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY, INVERTED INDEX, FULLTEXT INDEX | 2 | idc | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics in public; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | PRIMARY | 2 | idc | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| system_metrics | 1 | INVERTED INDEX | 6 | desc1 | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | INVERTED INDEX, FULLTEXT INDEX | 7 | desc2 | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | FULLTEXT INDEX | 8 | desc3 | A | | | | YES | greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY, INVERTED INDEX, FULLTEXT INDEX | 2 | idc | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics like '%util%'; @@ -62,11 +72,11 @@ Error: 1001(Unsupported), SQL statement is not supported, keyword: like SHOW INDEX FROM system_metrics WHERE Key_name = 'TIME INDEX'; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ DROP TABLE system_metrics; diff --git a/tests/cases/standalone/common/show/show_index.sql b/tests/cases/standalone/common/show/show_index.sql index 3f804db384..f0c5894a0a 100644 --- a/tests/cases/standalone/common/show/show_index.sql +++ b/tests/cases/standalone/common/show/show_index.sql @@ -1,11 +1,15 @@ CREATE TABLE IF NOT EXISTS system_metrics ( host STRING, - idc STRING, + idc STRING FULLTEXT, cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, + desc1 STRING, + desc2 STRING FULLTEXT, + desc3 STRING FULLTEXT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), + INVERTED INDEX(idc, desc1, desc2), TIME INDEX(ts) ); diff --git a/tests/cases/standalone/common/subquery/table.result b/tests/cases/standalone/common/subquery/table.result index 8cea3aed13..549a387549 100644 --- a/tests/cases/standalone/common/subquery/table.result +++ b/tests/cases/standalone/common/subquery/table.result @@ -63,7 +63,7 @@ Affected Rows: 0 -- subquery union, from: -- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/subquery/table/test_subquery_union.test -SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43); +SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43) ORDER BY 1; +-----------+ | Int64(42) | diff --git a/tests/cases/standalone/common/subquery/table.sql b/tests/cases/standalone/common/subquery/table.sql index 9f53aef301..d49f3af739 100644 --- a/tests/cases/standalone/common/subquery/table.sql +++ b/tests/cases/standalone/common/subquery/table.sql @@ -24,7 +24,7 @@ DROP TABLE test; -- subquery union, from: -- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/subquery/table/test_subquery_union.test -SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43); +SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43) ORDER BY 1; -- table subquery, from: -- https://github.com/duckdb/duckdb/blob/8704c7d0807d6ce1e2ebcdf6398e1b6cc050e507/test/sql/subquery/table/test_table_subquery.test diff --git a/tests/cases/standalone/common/tql/basic.result b/tests/cases/standalone/common/tql/basic.result index 5c6725dbcd..3015101a55 100644 --- a/tests/cases/standalone/common/tql/basic.result +++ b/tests/cases/standalone/common/tql/basic.result @@ -66,6 +66,10 @@ TQL EVAL (0, 10, '5s') {__name__!="test"}; Error: 2000(InvalidSyntax), vector selector must contain at least one non-empty matcher +TQL EVAL (0, 10, '5s') {__name__=~"test"}; + +Error: 1004(InvalidArguments), Matcher operator =~ is not supported for __name__ + -- the point at 1ms will be shadowed by the point at 2ms TQL EVAL (0, 10, '5s') test{k="a"}; diff --git a/tests/cases/standalone/common/tql/basic.sql b/tests/cases/standalone/common/tql/basic.sql index 85f2948148..afca586ed8 100644 --- a/tests/cases/standalone/common/tql/basic.sql +++ b/tests/cases/standalone/common/tql/basic.sql @@ -22,6 +22,8 @@ TQL EVAL (0, 10, '5s') {__name__="test", __field__="i"}; -- NOT SUPPORTED: `__name__` matcher without equal condition TQL EVAL (0, 10, '5s') {__name__!="test"}; +TQL EVAL (0, 10, '5s') {__name__=~"test"}; + -- the point at 1ms will be shadowed by the point at 2ms TQL EVAL (0, 10, '5s') test{k="a"}; diff --git a/tests/conf/metasrv-test.toml.template b/tests/conf/metasrv-test.toml.template index 8d27aad3c4..1196403a26 100644 --- a/tests/conf/metasrv-test.toml.template +++ b/tests/conf/metasrv-test.toml.template @@ -1,4 +1,14 @@ flush_stats_factor = 1 +{{ if use_etcd }} +## Store server address default to etcd store. +store_addrs = [{store_addrs | unescaped}] + +## Store data in memory. +use_memory_store = false + +## The datastore for meta server. +backend = "EtcdStore" +{{ endif }} [wal] {{ if is_raft_engine }} provider = "raft_engine" diff --git a/tests/runner/Cargo.toml b/tests/runner/Cargo.toml index 71312c39de..3ea403e862 100644 --- a/tests/runner/Cargo.toml +++ b/tests/runner/Cargo.toml @@ -16,12 +16,18 @@ common-query.workspace = true common-recordbatch.workspace = true common-time.workspace = true datatypes = { workspace = true } +flate2 = "1.0" +hex = "0.4" +local-ip-address = "0.6" mysql = { version = "25.0.1", default-features = false, features = ["minimal", "rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } serde.workspace = true serde_json.workspace = true -tokio-postgres = { workspace = true } -# sqlness 0.6.0 have a bug causing `cargo sqlness` to fail(see https://github.com/CeresDB/sqlness/issues/68) which is fixed in 0.6.1 -sqlness = "0.6.1" +sha2 = "0.10" +sqlness = "0.6.1" # sqlness 0.6.0 have a bug causing `cargo sqlness` to fail(see https://github.com/CeresDB/sqlness/issues/68) which is fixed in 0.6.1 +tar = "0.4" tempfile.workspace = true tinytemplate = "1.2" tokio.workspace = true +tokio-postgres = { workspace = true } +tokio-stream.workspace = true diff --git a/tests/runner/src/env.rs b/tests/runner/src/env.rs index bb5d74a267..81bbe2fb0b 100644 --- a/tests/runner/src/env.rs +++ b/tests/runner/src/env.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::borrow::Cow; +use std::collections::HashMap; use std::fmt::Display; use std::fs::OpenOptions; use std::io; @@ -45,6 +46,7 @@ use tokio::sync::Mutex as TokioMutex; use tokio_postgres::{Client as PgClient, SimpleQueryMessage as PgRow}; use crate::protocol_interceptor::{MYSQL, PROTOCOL_KEY}; +use crate::util::{get_workspace_root, maybe_pull_binary, PROGRAM}; use crate::{util, ServerAddr}; const METASRV_ADDR: &str = "127.0.0.1:29302"; @@ -64,6 +66,12 @@ pub enum WalConfig { }, } +#[derive(Clone)] +pub struct StoreConfig { + pub store_addrs: Vec, + pub setup_etcd: bool, +} + #[derive(Clone)] pub struct Env { sqlness_home: PathBuf, @@ -74,6 +82,12 @@ pub struct Env { /// When running in CI, this is expected to be set. /// If not set, this runner will build the GreptimeDB binary itself when needed, and set this field by then. bins_dir: Arc>>, + /// The path to the directory that contains the old pre-built GreptimeDB binaries. + versioned_bins_dirs: Arc>>, + /// Pull different versions of GreptimeDB on need. + pull_version_on_need: bool, + /// Store address for metasrv metadata + store_config: StoreConfig, } #[async_trait] @@ -100,13 +114,21 @@ impl Env { data_home: PathBuf, server_addrs: ServerAddr, wal: WalConfig, + pull_version_on_need: bool, bins_dir: Option, + store_config: StoreConfig, ) -> Self { Self { sqlness_home: data_home, server_addrs, wal, - bins_dir: Arc::new(Mutex::new(bins_dir)), + pull_version_on_need, + bins_dir: Arc::new(Mutex::new(bins_dir.clone())), + versioned_bins_dirs: Arc::new(Mutex::new(HashMap::from_iter([( + "latest".to_string(), + bins_dir.clone().unwrap_or(util::get_binary_dir("debug")), + )]))), + store_config, } } @@ -117,7 +139,7 @@ impl Env { self.build_db(); self.setup_wal(); - let db_ctx = GreptimeDBContext::new(self.wal.clone()); + let db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone()); let server_process = self.start_server("standalone", &db_ctx, true).await; @@ -136,8 +158,9 @@ impl Env { } else { self.build_db(); self.setup_wal(); + self.setup_etcd(); - let db_ctx = GreptimeDBContext::new(self.wal.clone()); + let db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone()); // start a distributed GreptimeDB let meta_server = self.start_server("metasrv", &db_ctx, true).await; @@ -152,12 +175,12 @@ impl Env { let mut greptimedb = self.connect_db(&Default::default()).await; - greptimedb.metasrv_process = Some(meta_server); + greptimedb.metasrv_process = Some(meta_server).into(); greptimedb.server_processes = Some(Arc::new(Mutex::new(vec![ datanode_1, datanode_2, datanode_3, ]))); - greptimedb.frontend_process = Some(frontend); - greptimedb.flownode_process = Some(flownode); + greptimedb.frontend_process = Some(frontend).into(); + greptimedb.flownode_process = Some(flownode).into(); greptimedb.is_standalone = false; greptimedb.ctx = db_ctx; @@ -237,13 +260,14 @@ impl Env { pg_client: TokioMutex::new(pg_client), mysql_client: TokioMutex::new(mysql_client), server_processes: None, - metasrv_process: None, - frontend_process: None, - flownode_process: None, + metasrv_process: None.into(), + frontend_process: None.into(), + flownode_process: None.into(), ctx: GreptimeDBContext { time: 0, datanode_id: Default::default(), wal: self.wal.clone(), + store_config: self.store_config.clone(), }, is_standalone: false, env: self.clone(), @@ -341,7 +365,7 @@ impl Env { ) } "metasrv" => { - let args = vec![ + let mut args = vec![ DEFAULT_LOG_LEVEL.to_string(), subcommand.to_string(), "start".to_string(), @@ -349,8 +373,6 @@ impl Env { "127.0.0.1:29302".to_string(), "--server-addr".to_string(), "127.0.0.1:29302".to_string(), - "--backend".to_string(), - "memory-store".to_string(), "--enable-region-failover".to_string(), "false".to_string(), "--http-addr=127.0.0.1:29502".to_string(), @@ -361,6 +383,9 @@ impl Env { "-c".to_string(), self.generate_config_file(subcommand, db_ctx), ]; + if db_ctx.store_config().store_addrs.is_empty() { + args.extend(vec!["--backend".to_string(), "memory-store".to_string()]) + } (args, vec![METASRV_ADDR.to_string()]) } _ => panic!("Unexpected subcommand: {subcommand}"), @@ -375,23 +400,20 @@ impl Env { } } - #[cfg(not(windows))] - let program = "./greptime"; - #[cfg(windows)] - let program = "greptime.exe"; + let program = PROGRAM; let bins_dir = self.bins_dir.lock().unwrap().clone().expect( "GreptimeDB binary is not available. Please pass in the path to the directory that contains the pre-built GreptimeDB binary. Or you may call `self.build_db()` beforehand.", ); let mut process = Command::new(program) - .current_dir(bins_dir) + .current_dir(bins_dir.clone()) .env("TZ", "UTC") .args(args) .stdout(stdout_file) .spawn() .unwrap_or_else(|error| { - panic!("Failed to start the DB with subcommand {subcommand},Error: {error}") + panic!("Failed to start the DB with subcommand {subcommand},Error: {error}, path: {:?}", bins_dir.join(program)); }); for check_ip_addr in &check_ip_addrs { @@ -452,7 +474,7 @@ impl Env { } /// stop and restart the server process - async fn restart_server(&self, db: &GreptimeDB) { + async fn restart_server(&self, db: &GreptimeDB, is_full_restart: bool) { { if let Some(server_process) = db.server_processes.clone() { let mut server_processes = server_process.lock().unwrap(); @@ -460,6 +482,23 @@ impl Env { Env::stop_server(server_process); } } + if is_full_restart { + if let Some(mut metasrv_process) = + db.metasrv_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut metasrv_process); + } + if let Some(mut frontend_process) = + db.frontend_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut frontend_process); + } + if let Some(mut flownode_process) = + db.flownode_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut flownode_process); + } + } } // check if the server is distributed or standalone @@ -468,12 +507,37 @@ impl Env { vec![new_server_process] } else { db.ctx.reset_datanode_id(); + if is_full_restart { + let metasrv = self.start_server("metasrv", &db.ctx, false).await; + db.metasrv_process + .lock() + .expect("lock poisoned") + .replace(metasrv); + + // wait for metasrv to start + // since it seems older version of db might take longer to complete election + tokio::time::sleep(Duration::from_secs(5)).await; + } let mut processes = vec![]; for _ in 0..3 { let new_server_process = self.start_server("datanode", &db.ctx, false).await; processes.push(new_server_process); } + + if is_full_restart { + let frontend = self.start_server("frontend", &db.ctx, false).await; + db.frontend_process + .lock() + .expect("lock poisoned") + .replace(frontend); + + let flownode = self.start_server("flownode", &db.ctx, false).await; + db.flownode_process + .lock() + .expect("lock poisoned") + .replace(flownode); + } processes }; @@ -493,6 +557,19 @@ impl Env { } } + /// Setup etcd if needed. + fn setup_etcd(&self) { + if self.store_config.setup_etcd { + let client_ports = self + .store_config + .store_addrs + .iter() + .map(|s| s.split(':').nth(1).unwrap().parse::().unwrap()) + .collect::>(); + util::setup_etcd(client_ports, None, None); + } + } + /// Generate config file to `/tmp/{subcommand}-{current_time}.toml` fn generate_config_file(&self, subcommand: &str, db_ctx: &GreptimeDBContext) -> String { let mut tt = TinyTemplate::new(); @@ -509,6 +586,8 @@ impl Env { procedure_dir: String, is_raft_engine: bool, kafka_wal_broker_endpoints: String, + use_etcd: bool, + store_addrs: String, } let data_home = self.sqlness_home.join(format!("greptimedb-{subcommand}")); @@ -522,6 +601,15 @@ impl Env { procedure_dir, is_raft_engine: db_ctx.is_raft_engine(), kafka_wal_broker_endpoints: db_ctx.kafka_wal_broker_endpoints(), + use_etcd: !self.store_config.store_addrs.is_empty(), + store_addrs: self + .store_config + .store_addrs + .clone() + .iter() + .map(|p| format!("\"{p}\"")) + .collect::>() + .join(","), }; let rendered = tt.render(subcommand, &ctx).unwrap(); @@ -580,9 +668,9 @@ impl Env { pub struct GreptimeDB { server_processes: Option>>>, - metasrv_process: Option, - frontend_process: Option, - flownode_process: Option, + metasrv_process: Mutex>, + frontend_process: Mutex>, + flownode_process: Mutex>, grpc_client: TokioMutex, pg_client: TokioMutex, mysql_client: TokioMutex, @@ -693,8 +781,35 @@ impl GreptimeDB { impl Database for GreptimeDB { async fn query(&self, ctx: QueryContext, query: String) -> Box { if ctx.context.contains_key("restart") && self.env.server_addrs.server_addr.is_none() { - self.env.restart_server(self).await; + self.env.restart_server(self, false).await; + } else if let Some(version) = ctx.context.get("version") { + let version_bin_dir = self + .env + .versioned_bins_dirs + .lock() + .expect("lock poison") + .get(version.as_str()) + .cloned(); + + match version_bin_dir { + Some(path) if path.clone().join(PROGRAM).is_file() => { + // use version in versioned_bins_dirs + *self.env.bins_dir.lock().unwrap() = Some(path.clone()); + } + _ => { + // use version in dir files + maybe_pull_binary(version, self.env.pull_version_on_need).await; + let root = get_workspace_root(); + let new_path = PathBuf::from_iter([&root, version]); + *self.env.bins_dir.lock().unwrap() = Some(new_path); + } + } + + self.env.restart_server(self, true).await; + // sleep for a while to wait for the server to fully boot up + tokio::time::sleep(Duration::from_secs(5)).await; } + if let Some(protocol) = ctx.context.get(PROTOCOL_KEY) { // protocol is bound to be either "mysql" or "postgres" if protocol == MYSQL { @@ -720,15 +835,30 @@ impl GreptimeDB { ); } } - if let Some(mut metasrv) = self.metasrv_process.take() { + if let Some(mut metasrv) = self + .metasrv_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut metasrv); println!("Metasrv (pid = {}) is stopped", metasrv.id()); } - if let Some(mut frontend) = self.frontend_process.take() { + if let Some(mut frontend) = self + .frontend_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut frontend); println!("Frontend (pid = {}) is stopped", frontend.id()); } - if let Some(mut flownode) = self.flownode_process.take() { + if let Some(mut flownode) = self + .flownode_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut flownode); println!("Flownode (pid = {}) is stopped", flownode.id()); } @@ -752,14 +882,16 @@ struct GreptimeDBContext { time: i64, datanode_id: AtomicU32, wal: WalConfig, + store_config: StoreConfig, } impl GreptimeDBContext { - pub fn new(wal: WalConfig) -> Self { + pub fn new(wal: WalConfig, store_config: StoreConfig) -> Self { Self { time: common_time::util::current_time_millis(), datanode_id: AtomicU32::new(0), wal, + store_config, } } @@ -787,6 +919,10 @@ impl GreptimeDBContext { fn reset_datanode_id(&self) { self.datanode_id.store(0, Ordering::Relaxed); } + + fn store_config(&self) -> StoreConfig { + self.store_config.clone() + } } struct ResultDisplayer { diff --git a/tests/runner/src/main.rs b/tests/runner/src/main.rs index eca72f280e..2e3158e195 100644 --- a/tests/runner/src/main.rs +++ b/tests/runner/src/main.rs @@ -22,6 +22,8 @@ use env::{Env, WalConfig}; use sqlness::interceptor::Registry; use sqlness::{ConfigBuilder, Runner}; +use crate::env::StoreConfig; + mod env; mod protocol_interceptor; mod util; @@ -92,6 +94,18 @@ struct Args { /// This may affect future test runs. #[clap(long)] preserve_state: bool, + + /// Pull Different versions of GreptimeDB on need. + #[clap(long, default_value = "true")] + pull_version_on_need: bool, + + /// The store addresses for metadata, if empty, will use memory store. + #[clap(long)] + store_addrs: Vec, + + /// Whether to setup etcd, by default it is false. + #[clap(long, default_value = "false")] + setup_etcd: bool, } #[tokio::main] @@ -110,6 +124,11 @@ async fn main() { Arc::new(protocol_interceptor::ProtocolInterceptorFactory), ); + if let Some(d) = &args.case_dir { + if !d.is_dir() { + panic!("{} is not a directory", d.display()); + } + } let config = ConfigBuilder::default() .case_dir(util::get_case_dir(args.case_dir)) .fail_fast(args.fail_fast) @@ -132,19 +151,30 @@ async fn main() { }, }; + let store = StoreConfig { + store_addrs: args.store_addrs.clone(), + setup_etcd: args.setup_etcd, + }; + let runner = Runner::new( config, Env::new( sqlness_home.clone(), args.server_addr.clone(), wal, + args.pull_version_on_need, args.bins_dir, + store, ), ); runner.run().await.unwrap(); // clean up and exit if !args.preserve_state { + if args.setup_etcd { + println!("Stopping etcd"); + util::stop_rm_etcd(); + } println!("Removing state in {:?}", sqlness_home); tokio::fs::remove_dir_all(sqlness_home).await.unwrap(); } diff --git a/tests/runner/src/util.rs b/tests/runner/src/util.rs index 04c336e148..4bcd482a26 100644 --- a/tests/runner/src/util.rs +++ b/tests/runner/src/util.rs @@ -12,18 +12,299 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::io::Read; use std::net::SocketAddr; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Duration; +use sha2::{Digest, Sha256}; use tokio::io::AsyncWriteExt; use tokio::net::TcpSocket; use tokio::time; +use tokio_stream::StreamExt; /// Check port every 0.1 second. const PORT_CHECK_INTERVAL: Duration = Duration::from_millis(100); +#[cfg(not(windows))] +pub const PROGRAM: &str = "./greptime"; +#[cfg(windows)] +pub const PROGRAM: &str = "greptime.exe"; + +fn http_proxy() -> Option { + for proxy in ["http_proxy", "HTTP_PROXY", "all_proxy", "ALL_PROXY"] { + if let Ok(proxy_addr) = std::env::var(proxy) { + println!("Getting Proxy from env var: {}={}", proxy, proxy_addr); + return Some(proxy_addr); + } + } + None +} + +fn https_proxy() -> Option { + for proxy in ["https_proxy", "HTTPS_PROXY", "all_proxy", "ALL_PROXY"] { + if let Ok(proxy_addr) = std::env::var(proxy) { + println!("Getting Proxy from env var: {}={}", proxy, proxy_addr); + return Some(proxy_addr); + } + } + None +} + +async fn download_files(url: &str, path: &str) { + let proxy = if url.starts_with("http://") { + http_proxy().map(|proxy| reqwest::Proxy::http(proxy).unwrap()) + } else if url.starts_with("https://") { + https_proxy().map(|proxy| reqwest::Proxy::https(proxy).unwrap()) + } else { + None + }; + + let client = proxy + .map(|proxy| { + reqwest::Client::builder() + .proxy(proxy) + .build() + .expect("Failed to build client") + }) + .unwrap_or(reqwest::Client::new()); + + let mut file = tokio::fs::File::create(path) + .await + .unwrap_or_else(|_| panic!("Failed to create file in {path}")); + println!("Downloading {}...", url); + + let resp = client + .get(url) + .send() + .await + .expect("Failed to send download request"); + let len = resp.content_length(); + let mut stream = resp.bytes_stream(); + let mut size_downloaded = 0; + + while let Some(chunk_result) = stream.next().await { + let chunk = chunk_result.unwrap(); + size_downloaded += chunk.len(); + if let Some(len) = len { + print!("\rDownloading {}/{} bytes", size_downloaded, len); + } else { + print!("\rDownloaded {} bytes", size_downloaded); + } + + file.write_all(&chunk).await.unwrap(); + } + + file.flush().await.unwrap(); + + println!("\nDownloaded {}", url); +} + +fn decompress(archive: &str, dest: &str) { + let tar = std::fs::File::open(archive).unwrap(); + let dec = flate2::read::GzDecoder::new(tar); + let mut a = tar::Archive::new(dec); + a.unpack(dest).unwrap(); +} + +/// Use curl to download the binary from the release page. +/// +/// # Arguments +/// +/// * `version` - The version of the binary to download. i.e. "v0.9.5" +pub async fn pull_binary(version: &str) { + let os = std::env::consts::OS; + let arch = match std::env::consts::ARCH { + "x86_64" => "amd64", + "aarch64" => "arm64", + _ => panic!("Unsupported arch: {}", std::env::consts::ARCH), + }; + let triple = format!("greptime-{}-{}-{}", os, arch, version); + let filename = format!("{triple}.tar.gz"); + + let url = format!( + "https://github.com/GreptimeTeam/greptimedb/releases/download/{version}/{filename}" + ); + println!("Downloading {version} binary from {}", url); + + // mkdir {version} + let _ = std::fs::create_dir(version); + + let archive = Path::new(version).join(filename); + let folder_path = Path::new(version); + + // download the binary to the version directory + download_files(&url, &archive.to_string_lossy()).await; + + let checksum_file = format!("{triple}.sha256sum"); + let checksum_url = format!( + "https://github.com/GreptimeTeam/greptimedb/releases/download/{version}/{checksum_file}" + ); + download_files( + &checksum_url, + &PathBuf::from_iter([version, &checksum_file]).to_string_lossy(), + ) + .await; + + // verify the checksum + let mut file = std::fs::File::open(&archive).unwrap(); + let mut sha256 = Sha256::new(); + std::io::copy(&mut file, &mut sha256).unwrap(); + let checksum: Vec = sha256.finalize().to_vec(); + + let mut expected_checksum = + std::fs::File::open(PathBuf::from_iter([version, &checksum_file])).unwrap(); + let mut buf = String::new(); + expected_checksum.read_to_string(&mut buf).unwrap(); + let expected_checksum = hex::decode(buf.lines().next().unwrap()).unwrap(); + + assert_eq!( + checksum, expected_checksum, + "Checksum mismatched, downloaded file is corrupted" + ); + + decompress(&archive.to_string_lossy(), &folder_path.to_string_lossy()); + println!("Downloaded and extracted {version} binary to {folder_path:?}"); + + // move the binary to the version directory + std::fs::rename( + PathBuf::from_iter([version, &triple, "greptime"]), + PathBuf::from_iter([version, "greptime"]), + ) + .unwrap(); + + // remove the archive and inner folder + std::fs::remove_file(&archive).unwrap(); + std::fs::remove_dir(PathBuf::from_iter([version, &triple])).unwrap(); +} + +/// Pull the binary if it does not exist and `pull_version_on_need` is true. +pub async fn maybe_pull_binary(version: &str, pull_version_on_need: bool) { + let exist = Path::new(version).join(PROGRAM).is_file(); + match (exist, pull_version_on_need){ + (true, _) => println!("Binary {version} exists"), + (false, false) => panic!("Binary {version} does not exist, please run with --pull-version-on-need or manually download it"), + (false, true) => { pull_binary(version).await; }, + } +} + +/// Set up a standalone etcd in docker. +pub fn setup_etcd(client_ports: Vec, peer_port: Option, etcd_version: Option<&str>) { + if std::process::Command::new("docker") + .args(["-v"]) + .status() + .is_err() + { + panic!("Docker is not installed"); + } + let peer_port = peer_port.unwrap_or(2380); + let exposed_port: Vec<_> = client_ports.iter().chain(Some(&peer_port)).collect(); + let exposed_port_str = exposed_port + .iter() + .flat_map(|p| ["-p".to_string(), format!("{p}:{p}")]) + .collect::>(); + let etcd_version = etcd_version.unwrap_or("v3.5.17"); + let etcd_image = format!("quay.io/coreos/etcd:{etcd_version}"); + let peer_url = format!("http://0.0.0.0:{peer_port}"); + let my_local_ip = local_ip_address::local_ip().unwrap(); + + let my_local_ip_str = my_local_ip.to_string(); + + let mut arg_list = vec![]; + arg_list.extend([ + "run", + "-d", + "-v", + "/usr/share/ca-certificates/:/etc/ssl/certs", + ]); + arg_list.extend(exposed_port_str.iter().map(std::ops::Deref::deref)); + arg_list.extend([ + "--name", + "etcd", + &etcd_image, + "etcd", + "-name", + "etcd0", + "-advertise-client-urls", + ]); + + let adv_client_urls = client_ports + .iter() + .map(|p| format!("http://{my_local_ip_str}:{p}")) + .collect::>() + .join(","); + + arg_list.push(&adv_client_urls); + + arg_list.extend(["-listen-client-urls"]); + + let client_ports_fmt = client_ports + .iter() + .map(|p| format!("http://0.0.0.0:{p}")) + .collect::>() + .join(","); + + arg_list.push(&client_ports_fmt); + + arg_list.push("-initial-advertise-peer-urls"); + let advertise_peer_url = format!("http://{my_local_ip_str}:{peer_port}"); + arg_list.push(&advertise_peer_url); + + arg_list.extend(["-listen-peer-urls", &peer_url]); + + arg_list.extend(["-initial-cluster-token", "etcd-cluster-1"]); + + arg_list.push("-initial-cluster"); + + let init_cluster_url = format!("etcd0=http://{my_local_ip_str}:{peer_port}"); + + arg_list.push(&init_cluster_url); + + arg_list.extend(["-initial-cluster-state", "new"]); + + let mut cmd = std::process::Command::new("docker"); + + cmd.args(arg_list); + + println!("Starting etcd with command: {:?}", cmd); + + let status = cmd.status(); + if status.is_err() { + panic!("Failed to start etcd: {:?}", status); + } else if let Ok(status) = status { + if status.success() { + println!( + "Started etcd with client ports {:?} and peer port {}, statues:{status:?}", + client_ports, peer_port + ); + } else { + panic!("Failed to start etcd: {:?}", status); + } + } +} + +/// Stop and remove the etcd container +pub fn stop_rm_etcd() { + let status = std::process::Command::new("docker") + .args(["container", "stop", "etcd"]) + .status(); + if status.is_err() { + panic!("Failed to stop etcd: {:?}", status); + } else { + println!("Stopped etcd"); + } + // rm the container + let status = std::process::Command::new("docker") + .args(["container", "rm", "etcd"]) + .status(); + if status.is_err() { + panic!("Failed to remove etcd container: {:?}", status); + } else { + println!("Removed etcd container"); + } +} + /// Get the dir of test cases. This function only works when the runner is run /// under the project's dir because it depends on some envs set by cargo. pub fn get_case_dir(case_dir: Option) -> String { diff --git a/tests/upgrade-compat/distributed/common b/tests/upgrade-compat/distributed/common new file mode 120000 index 0000000000..2b0920287d --- /dev/null +++ b/tests/upgrade-compat/distributed/common @@ -0,0 +1 @@ +../standalone/common \ No newline at end of file diff --git a/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result new file mode 100644 index 0000000000..046255a641 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.10.2 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql new file mode 100644 index 0000000000..1907533b15 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.10.2 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result new file mode 100644 index 0000000000..7ce230a688 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.11.0 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql new file mode 100644 index 0000000000..963170fdf5 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.11.0 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result new file mode 100644 index 0000000000..41b81f01c0 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql new file mode 100644 index 0000000000..9908085213 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/test_simple.result b/tests/upgrade-compat/standalone/common/test_simple.result new file mode 100644 index 0000000000..ff2c340598 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_simple.result @@ -0,0 +1,47 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +); + +Affected Rows: 0 + +INSERT INTO system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+-----------------------------------------------------------+ + +DROP TABLE system_metrics; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/test_simple.sql b/tests/upgrade-compat/standalone/common/test_simple.sql new file mode 100644 index 0000000000..0f8daa0985 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_simple.sql @@ -0,0 +1,22 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +); + +INSERT INTO system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE system_metrics; + +DROP TABLE system_metrics; diff --git a/tests/upgrade-compat/standalone/common/test_ttl.result b/tests/upgrade-compat/standalone/common/test_ttl.result new file mode 100644 index 0000000000..d06bc629b6 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_ttl.result @@ -0,0 +1,153 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE test_ttl_0s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '0 second'); + +Affected Rows: 0 + +CREATE TABLE test_ttl_1s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '1 second'); + +Affected Rows: 0 + +CREATE TABLE test_ttl_none(ts TIMESTAMP TIME INDEX, val INT); + +Affected Rows: 0 + +CREATE DATABASE ttl_db_1s WITH (ttl = '1 second'); + +Affected Rows: 1 + +CREATE DATABASE ttl_db_0s WITH (ttl = '0 second'); + +Affected Rows: 1 + +CREATE DATABASE ttl_db_none; + +Affected Rows: 1 + +-- SQLNESS ARG version=latest +SHOW TABLES; + ++---------------+ +| Tables | ++---------------+ +| numbers | +| test_ttl_0s | +| test_ttl_1s | +| test_ttl_none | ++---------------+ + +SHOW CREATE TABLE test_ttl_1s; + ++-------------+--------------------------------------------+ +| Table | Create Table | ++-------------+--------------------------------------------+ +| test_ttl_1s | CREATE TABLE IF NOT EXISTS "test_ttl_1s" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | ttl = '1s' | +| | ) | ++-------------+--------------------------------------------+ + +SHOW CREATE TABLE test_ttl_0s; + ++-------------+--------------------------------------------+ +| Table | Create Table | ++-------------+--------------------------------------------+ +| test_ttl_0s | CREATE TABLE IF NOT EXISTS "test_ttl_0s" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | ttl = '0s' | +| | ) | ++-------------+--------------------------------------------+ + +SHOW CREATE TABLE test_ttl_none; + ++---------------+----------------------------------------------+ +| Table | Create Table | ++---------------+----------------------------------------------+ +| test_ttl_none | CREATE TABLE IF NOT EXISTS "test_ttl_none" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------+----------------------------------------------+ + +DROP TABLE test_ttl_1s; + +Affected Rows: 0 + +DROP TABLE test_ttl_0s; + +Affected Rows: 0 + +DROP TABLE test_ttl_none; + +Affected Rows: 0 + +SHOW DATABASES; + ++--------------------+ +| Database | ++--------------------+ +| greptime_private | +| information_schema | +| public | +| ttl_db_0s | +| ttl_db_1s | +| ttl_db_none | ++--------------------+ + +SHOW CREATE DATABASE ttl_db_1s; + ++-----------+-----------------------------------------+ +| Database | Create Database | ++-----------+-----------------------------------------+ +| ttl_db_1s | CREATE DATABASE IF NOT EXISTS ttl_db_1s | +| | WITH( | +| | ttl = '1s' | +| | ) | ++-----------+-----------------------------------------+ + +SHOW CREATE DATABASE ttl_db_0s; + ++-----------+-----------------------------------------+ +| Database | Create Database | ++-----------+-----------------------------------------+ +| ttl_db_0s | CREATE DATABASE IF NOT EXISTS ttl_db_0s | +| | WITH( | +| | ttl = '0s' | +| | ) | ++-----------+-----------------------------------------+ + +SHOW CREATE DATABASE ttl_db_none; + ++-------------+-------------------------------------------+ +| Database | Create Database | ++-------------+-------------------------------------------+ +| ttl_db_none | CREATE DATABASE IF NOT EXISTS ttl_db_none | ++-------------+-------------------------------------------+ + +DROP DATABASE ttl_db_1s; + +Affected Rows: 0 + +DROP DATABASE ttl_db_0s; + +Affected Rows: 0 + +DROP DATABASE ttl_db_none; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/test_ttl.sql b/tests/upgrade-compat/standalone/common/test_ttl.sql new file mode 100644 index 0000000000..3462fd2244 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_ttl.sql @@ -0,0 +1,42 @@ + +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE test_ttl_0s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '0 second'); + +CREATE TABLE test_ttl_1s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '1 second'); + +CREATE TABLE test_ttl_none(ts TIMESTAMP TIME INDEX, val INT); + +CREATE DATABASE ttl_db_1s WITH (ttl = '1 second'); + +CREATE DATABASE ttl_db_0s WITH (ttl = '0 second'); + +CREATE DATABASE ttl_db_none; + +-- SQLNESS ARG version=latest +SHOW TABLES; + +SHOW CREATE TABLE test_ttl_1s; + +SHOW CREATE TABLE test_ttl_0s; + +SHOW CREATE TABLE test_ttl_none; + +DROP TABLE test_ttl_1s; + +DROP TABLE test_ttl_0s; + +DROP TABLE test_ttl_none; + +SHOW DATABASES; + +SHOW CREATE DATABASE ttl_db_1s; + +SHOW CREATE DATABASE ttl_db_0s; + +SHOW CREATE DATABASE ttl_db_none; + +DROP DATABASE ttl_db_1s; + +DROP DATABASE ttl_db_0s; + +DROP DATABASE ttl_db_none;