mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 16:32:54 +00:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01fdbf3626 | ||
|
|
97897aaf9b | ||
|
|
1fc42a681f | ||
|
|
fbc8f56eaa | ||
|
|
44280f7c9d | ||
|
|
0fbde48655 | ||
|
|
9dcfd28f61 | ||
|
|
82dbc3e1ae | ||
|
|
4d478658b5 | ||
|
|
89ebe47cd9 | ||
|
|
212ea2c25c | ||
|
|
1658d088ab | ||
|
|
346b57cf10 | ||
|
|
e1dcf83326 | ||
|
|
b5d9d635eb | ||
|
|
88dd78a69c | ||
|
|
6439b929b3 | ||
|
|
ba15c14103 |
16
.github/workflows/nightly-ci.yml
vendored
16
.github/workflows/nightly-ci.yml
vendored
@@ -34,6 +34,14 @@ jobs:
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Run sqlness
|
||||
run: cargo sqlness
|
||||
- name: Notify slack if failed
|
||||
if: failure()
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
|
||||
with:
|
||||
payload: |
|
||||
{"text": "Nightly CI failed for sqlness tests"}
|
||||
- name: Upload sqlness logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
@@ -80,3 +88,11 @@ jobs:
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
|
||||
GT_S3_REGION: ${{ secrets.S3_REGION }}
|
||||
UNITTEST_LOG_DIR: "__unittest_logs"
|
||||
- name: Notify slack if failed
|
||||
if: failure()
|
||||
uses: slackapi/slack-github-action@v1.23.0
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
|
||||
with:
|
||||
payload: |
|
||||
{"text": "Nightly CI failed for cargo test"}
|
||||
|
||||
9
.github/workflows/release.yml
vendored
9
.github/workflows/release.yml
vendored
@@ -302,8 +302,12 @@ jobs:
|
||||
release-cn-artifacts:
|
||||
name: Release artifacts to CN region
|
||||
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [
|
||||
needs: [ # The job have to wait for all the artifacts are built.
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
build-linux-arm64-artifacts,
|
||||
build-macos-artifacts,
|
||||
build-windows-artifacts,
|
||||
release-images-to-dockerhub,
|
||||
]
|
||||
runs-on: ubuntu-20.04
|
||||
@@ -338,11 +342,12 @@ jobs:
|
||||
publish-github-release:
|
||||
name: Create GitHub release and upload artifacts
|
||||
if: ${{ inputs.publish_github_release || github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [
|
||||
needs: [ # The job have to wait for all the artifacts are built.
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
build-linux-arm64-artifacts,
|
||||
build-macos-artifacts,
|
||||
build-windows-artifacts,
|
||||
release-images-to-dockerhub,
|
||||
]
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
26
.github/workflows/size-label.yml
vendored
Normal file
26
.github/workflows/size-label.yml
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
name: size-labeler
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
runs-on: ubuntu-latest
|
||||
name: Label the PR size
|
||||
steps:
|
||||
- uses: codelytv/pr-size-labeler@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
s_label: 'Size: S'
|
||||
s_max_size: '100'
|
||||
m_label: 'Size: M'
|
||||
m_max_size: '500'
|
||||
l_label: 'Size: L'
|
||||
l_max_size: '1000'
|
||||
xl_label: 'Size: XL'
|
||||
fail_if_xl: 'false'
|
||||
message_if_xl: >
|
||||
This PR exceeds the recommended size of 1000 lines.
|
||||
Please make sure you are NOT addressing multiple issues with one PR.
|
||||
Note this PR might be rejected due to its size.
|
||||
github_api_url: 'api.github.com'
|
||||
files_to_ignore: 'Cargo.lock'
|
||||
120
Cargo.lock
generated
120
Cargo.lock
generated
@@ -204,7 +204,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -666,7 +666,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -839,7 +839,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -1222,7 +1222,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1506,7 +1506,7 @@ checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961"
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1536,7 +1536,7 @@ dependencies = [
|
||||
"rand",
|
||||
"session",
|
||||
"snafu",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"substrait 0.7.5",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -1573,7 +1573,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -1621,7 +1621,7 @@ dependencies = [
|
||||
"servers",
|
||||
"session",
|
||||
"snafu",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tikv-jemallocator",
|
||||
@@ -1654,7 +1654,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"bitvec",
|
||||
@@ -1669,7 +1669,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -1682,7 +1682,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"humantime-serde",
|
||||
@@ -1691,7 +1691,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1720,7 +1720,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"snafu",
|
||||
"strum 0.25.0",
|
||||
@@ -1728,7 +1728,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"chrono-tz 0.6.3",
|
||||
@@ -1751,7 +1751,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1770,7 +1770,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1800,7 +1800,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1819,7 +1819,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"backtrace",
|
||||
@@ -1836,7 +1836,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1849,7 +1849,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1887,7 +1887,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1911,7 +1911,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -1919,7 +1919,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1942,7 +1942,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1959,7 +1959,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1976,7 +1976,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-error",
|
||||
@@ -2003,7 +2003,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand",
|
||||
@@ -2012,7 +2012,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2027,7 +2027,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
]
|
||||
@@ -2665,7 +2665,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2724,7 +2724,7 @@ dependencies = [
|
||||
"sql",
|
||||
"storage",
|
||||
"store-api",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -2738,7 +2738,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -3201,7 +3201,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -3311,7 +3311,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -3375,7 +3375,7 @@ dependencies = [
|
||||
"storage",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.7.6",
|
||||
@@ -5006,7 +5006,7 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -5276,7 +5276,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5306,7 +5306,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -5498,7 +5498,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -5960,11 +5960,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
@@ -5973,6 +5975,7 @@ dependencies = [
|
||||
"metrics",
|
||||
"moka",
|
||||
"opendal",
|
||||
"snafu",
|
||||
"tokio",
|
||||
"uuid",
|
||||
]
|
||||
@@ -6184,7 +6187,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-compat",
|
||||
@@ -6229,7 +6232,7 @@ dependencies = [
|
||||
"sqlparser 0.34.0",
|
||||
"storage",
|
||||
"store-api",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.9.2",
|
||||
@@ -6449,7 +6452,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6775,7 +6778,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"common-base",
|
||||
@@ -7025,7 +7028,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -7034,6 +7037,7 @@ dependencies = [
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-recordbatch",
|
||||
"common-telemetry",
|
||||
"datafusion",
|
||||
"datatypes",
|
||||
@@ -7287,7 +7291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"api",
|
||||
@@ -7344,7 +7348,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -8543,7 +8547,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -8823,7 +8827,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"aide",
|
||||
"api",
|
||||
@@ -8917,7 +8921,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -9195,7 +9199,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -9246,7 +9250,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.4.1",
|
||||
@@ -9452,7 +9456,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "storage"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -9506,7 +9510,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -9644,7 +9648,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -9802,7 +9806,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -9908,7 +9912,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9961,7 +9965,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.4.1",
|
||||
"substrait 0.4.2",
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
|
||||
@@ -55,7 +55,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -87,7 +87,7 @@ meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev =
|
||||
metrics = "0.20"
|
||||
moka = "0.12"
|
||||
once_cell = "1.18"
|
||||
opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics"] }
|
||||
opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics", "traces"] }
|
||||
parquet = "43.0"
|
||||
paste = "1.0"
|
||||
prost = "0.11"
|
||||
|
||||
@@ -19,8 +19,13 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
python3.10 \
|
||||
python3.10-dev \
|
||||
python3-pip
|
||||
python3.10-dev
|
||||
|
||||
# Remove Python 3.8 and install pip.
|
||||
RUN apt-get -y purge python3.8 && \
|
||||
apt-get -y autoremove && \
|
||||
ln -s /usr/bin/python3.10 /usr/bin/python3 && \
|
||||
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
|
||||
|
||||
RUN git config --global --add safe.directory /greptimedb
|
||||
|
||||
|
||||
@@ -39,17 +39,25 @@ pub trait ErrorExt: StackError {
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let error = self.last();
|
||||
if let Some(external_error) = error.source() {
|
||||
let external_root = external_error.sources().last().unwrap();
|
||||
|
||||
if error.to_string().is_empty() {
|
||||
format!("{external_root}")
|
||||
} else {
|
||||
format!("{error}: {external_root}")
|
||||
match self.status_code() {
|
||||
StatusCode::Unknown | StatusCode::Unexpected | StatusCode::Internal => {
|
||||
// masks internal error from end user
|
||||
format!("Internal error: {}", self.status_code() as u32)
|
||||
}
|
||||
_ => {
|
||||
let error = self.last();
|
||||
if let Some(external_error) = error.source() {
|
||||
let external_root = external_error.sources().last().unwrap();
|
||||
|
||||
if error.to_string().is_empty() {
|
||||
format!("{external_root}")
|
||||
} else {
|
||||
format!("{error}: {external_root}")
|
||||
}
|
||||
} else {
|
||||
format!("{error}")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
format!("{error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,7 +57,10 @@ impl GreptimeDBTelemetryTask {
|
||||
task_fn: BoxedTaskFunction<Error>,
|
||||
should_report: Arc<AtomicBool>,
|
||||
) -> Self {
|
||||
GreptimeDBTelemetryTask::Enable((RepeatedTask::new(interval, task_fn), should_report))
|
||||
GreptimeDBTelemetryTask::Enable((
|
||||
RepeatedTask::new(interval, task_fn).with_initial_delay(Some(Duration::ZERO)),
|
||||
should_report,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn disable() -> Self {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod memory;
|
||||
pub mod test;
|
||||
pub mod txn;
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
@@ -17,7 +17,6 @@ use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Range;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -85,21 +84,25 @@ impl<T: ErrorExt + Send + Sync + 'static> KvBackend for MemoryKvBackend<T> {
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse, Self::Error> {
|
||||
let range = req.range();
|
||||
let RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
limit,
|
||||
keys_only,
|
||||
limit, keys_only, ..
|
||||
} = req;
|
||||
|
||||
let kvs = self.kvs.read().unwrap();
|
||||
let values = kvs.range(range);
|
||||
|
||||
let iter: Box<dyn Iterator<Item = (&Vec<u8>, &Vec<u8>)>> = if range_end.is_empty() {
|
||||
Box::new(kvs.get_key_value(&key).into_iter())
|
||||
} else {
|
||||
Box::new(kvs.range(key..range_end))
|
||||
};
|
||||
let mut kvs = iter
|
||||
let mut more = false;
|
||||
let mut iter: i64 = 0;
|
||||
|
||||
let kvs = values
|
||||
.take_while(|_| {
|
||||
let take = limit == 0 || iter != limit;
|
||||
iter += 1;
|
||||
more = limit > 0 && iter > limit;
|
||||
|
||||
take
|
||||
})
|
||||
.map(|(k, v)| {
|
||||
let key = k.clone();
|
||||
let value = if keys_only { vec![] } else { v.clone() };
|
||||
@@ -107,13 +110,6 @@ impl<T: ErrorExt + Send + Sync + 'static> KvBackend for MemoryKvBackend<T> {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let more = if limit > 0 && kvs.len() > limit as usize {
|
||||
kvs.truncate(limit as usize);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
Ok(RangeResponse { kvs, more })
|
||||
}
|
||||
|
||||
@@ -215,36 +211,32 @@ impl<T: ErrorExt + Send + Sync + 'static> KvBackend for MemoryKvBackend<T> {
|
||||
&self,
|
||||
req: DeleteRangeRequest,
|
||||
) -> Result<DeleteRangeResponse, Self::Error> {
|
||||
let DeleteRangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
prev_kv,
|
||||
} = req;
|
||||
let range = req.range();
|
||||
let DeleteRangeRequest { prev_kv, .. } = req;
|
||||
|
||||
let mut kvs = self.kvs.write().unwrap();
|
||||
|
||||
let prev_kvs = if range_end.is_empty() {
|
||||
kvs.remove(&key)
|
||||
.into_iter()
|
||||
.map(|value| KeyValue {
|
||||
key: key.clone(),
|
||||
value,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
let range = Range {
|
||||
start: key,
|
||||
end: range_end,
|
||||
};
|
||||
kvs.extract_if(|key, _| range.contains(key))
|
||||
.map(Into::into)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let keys = kvs
|
||||
.range(range)
|
||||
.map(|(key, _)| key.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(DeleteRangeResponse {
|
||||
deleted: prev_kvs.len() as i64,
|
||||
prev_kvs: if prev_kv { prev_kvs } else { vec![] },
|
||||
})
|
||||
let mut prev_kvs = if prev_kv {
|
||||
Vec::with_capacity(keys.len())
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
let deleted = keys.len() as i64;
|
||||
|
||||
for key in keys {
|
||||
if let Some(value) = kvs.remove(&key) {
|
||||
if prev_kv {
|
||||
prev_kvs.push((key.clone(), value).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(DeleteRangeResponse { deleted, prev_kvs })
|
||||
}
|
||||
|
||||
async fn batch_delete(
|
||||
@@ -358,254 +350,63 @@ impl<T: ErrorExt + Send + Sync> TxnService for MemoryKvBackend<T> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::atomic::{AtomicU8, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::kv_backend::test::{
|
||||
prepare_kv, test_kv_batch_delete, test_kv_batch_get, test_kv_compare_and_put,
|
||||
test_kv_delete_range, test_kv_put, test_kv_range, test_kv_range_2,
|
||||
};
|
||||
use crate::kv_backend::KvBackend;
|
||||
use crate::rpc::store::{BatchGetRequest, BatchPutRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
use crate::util;
|
||||
|
||||
async fn mock_mem_store_with_data() -> MemoryKvBackend<Error> {
|
||||
let kv_store = MemoryKvBackend::<Error>::new();
|
||||
let kvs = mock_kvs();
|
||||
|
||||
assert!(kv_store
|
||||
.batch_put(BatchPutRequest {
|
||||
kvs,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.is_ok());
|
||||
|
||||
assert!(kv_store
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val11".to_vec(),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.is_ok());
|
||||
prepare_kv(&kv_store).await;
|
||||
|
||||
kv_store
|
||||
}
|
||||
|
||||
fn mock_kvs() -> Vec<KeyValue> {
|
||||
vec![
|
||||
KeyValue {
|
||||
key: b"key1".to_vec(),
|
||||
value: b"val1".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: b"key2".to_vec(),
|
||||
value: b"val2".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: b"key3".to_vec(),
|
||||
value: b"val3".to_vec(),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_put() {
|
||||
let kv_store = mock_mem_store_with_data().await;
|
||||
|
||||
let resp = kv_store
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val12".to_vec(),
|
||||
prev_kv: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(resp.prev_kv.is_none());
|
||||
|
||||
let resp = kv_store
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val13".to_vec(),
|
||||
prev_kv: true,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let prev_kv = resp.prev_kv.unwrap();
|
||||
assert_eq!(b"key11", prev_kv.key());
|
||||
assert_eq!(b"val12", prev_kv.value());
|
||||
test_kv_put(kv_store).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range() {
|
||||
let kv_store = mock_mem_store_with_data().await;
|
||||
|
||||
let key = b"key1".to_vec();
|
||||
let range_end = util::get_prefix_end_key(b"key1");
|
||||
test_kv_range(kv_store).await;
|
||||
}
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key: key.clone(),
|
||||
range_end: range_end.clone(),
|
||||
limit: 0,
|
||||
keys_only: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
#[tokio::test]
|
||||
async fn test_range_2() {
|
||||
let kv = MemoryKvBackend::<Error>::new();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
assert_eq!(b"key11", resp.kvs[1].key());
|
||||
assert_eq!(b"val11", resp.kvs[1].value());
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key: key.clone(),
|
||||
range_end: range_end.clone(),
|
||||
limit: 0,
|
||||
keys_only: true,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"", resp.kvs[0].value());
|
||||
assert_eq!(b"key11", resp.kvs[1].key());
|
||||
assert_eq!(b"", resp.kvs[1].value());
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key: key.clone(),
|
||||
limit: 0,
|
||||
keys_only: false,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
limit: 1,
|
||||
keys_only: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
test_kv_range_2(kv).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_get() {
|
||||
let kv_store = mock_mem_store_with_data().await;
|
||||
|
||||
let keys = vec![];
|
||||
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
|
||||
|
||||
assert!(resp.kvs.is_empty());
|
||||
|
||||
let keys = vec![b"key10".to_vec()];
|
||||
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
|
||||
|
||||
assert!(resp.kvs.is_empty());
|
||||
|
||||
let keys = vec![b"key1".to_vec(), b"key3".to_vec(), b"key4".to_vec()];
|
||||
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
assert_eq!(b"key3", resp.kvs[1].key());
|
||||
assert_eq!(b"val3", resp.kvs[1].value());
|
||||
test_kv_batch_get(kv_store).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_compare_and_put() {
|
||||
let kv_store = Arc::new(MemoryKvBackend::<Error>::new());
|
||||
let success = Arc::new(AtomicU8::new(0));
|
||||
|
||||
let mut joins = vec![];
|
||||
for _ in 0..20 {
|
||||
let kv_store_clone = kv_store.clone();
|
||||
let success_clone = success.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
let req = CompareAndPutRequest {
|
||||
key: b"key".to_vec(),
|
||||
expect: vec![],
|
||||
value: b"val_new".to_vec(),
|
||||
};
|
||||
let resp = kv_store_clone.compare_and_put(req).await.unwrap();
|
||||
if resp.success {
|
||||
success_clone.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
});
|
||||
joins.push(join);
|
||||
}
|
||||
|
||||
for join in joins {
|
||||
join.await.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(1, success.load(Ordering::SeqCst));
|
||||
test_kv_compare_and_put(kv_store).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_range() {
|
||||
let kv_store = mock_mem_store_with_data().await;
|
||||
|
||||
let req = DeleteRangeRequest {
|
||||
key: b"key3".to_vec(),
|
||||
range_end: vec![],
|
||||
prev_kv: true,
|
||||
};
|
||||
|
||||
let resp = kv_store.delete_range(req).await.unwrap();
|
||||
assert_eq!(1, resp.prev_kvs.len());
|
||||
assert_eq!(b"key3", resp.prev_kvs[0].key());
|
||||
assert_eq!(b"val3", resp.prev_kvs[0].value());
|
||||
|
||||
let resp = kv_store.get(b"key3").await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
|
||||
let req = DeleteRangeRequest {
|
||||
key: b"key2".to_vec(),
|
||||
range_end: vec![],
|
||||
prev_kv: false,
|
||||
};
|
||||
|
||||
let resp = kv_store.delete_range(req).await.unwrap();
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
|
||||
let resp = kv_store.get(b"key2").await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
|
||||
let key = b"key1".to_vec();
|
||||
let range_end = util::get_prefix_end_key(b"key1");
|
||||
|
||||
let req = DeleteRangeRequest {
|
||||
key: key.clone(),
|
||||
range_end: range_end.clone(),
|
||||
prev_kv: true,
|
||||
};
|
||||
let resp = kv_store.delete_range(req).await.unwrap();
|
||||
assert_eq!(2, resp.prev_kvs.len());
|
||||
|
||||
let req = RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
..Default::default()
|
||||
};
|
||||
let resp = kv_store.range(req).await.unwrap();
|
||||
assert!(resp.kvs.is_empty());
|
||||
test_kv_delete_range(kv_store).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -636,35 +437,6 @@ mod tests {
|
||||
async fn test_batch_delete() {
|
||||
let kv_store = mock_mem_store_with_data().await;
|
||||
|
||||
assert!(kv_store.get(b"key1").await.unwrap().is_some());
|
||||
assert!(kv_store.get(b"key100").await.unwrap().is_none());
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![b"key1".to_vec(), b"key100".to_vec()],
|
||||
prev_kv: true,
|
||||
};
|
||||
let resp = kv_store.batch_delete(req).await.unwrap();
|
||||
assert_eq!(1, resp.prev_kvs.len());
|
||||
assert_eq!(
|
||||
vec![KeyValue {
|
||||
key: b"key1".to_vec(),
|
||||
value: b"val1".to_vec()
|
||||
}],
|
||||
resp.prev_kvs
|
||||
);
|
||||
assert!(kv_store.get(b"key1").await.unwrap().is_none());
|
||||
|
||||
assert!(kv_store.get(b"key2").await.unwrap().is_some());
|
||||
assert!(kv_store.get(b"key3").await.unwrap().is_some());
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![b"key2".to_vec(), b"key3".to_vec()],
|
||||
prev_kv: false,
|
||||
};
|
||||
let resp = kv_store.batch_delete(req).await.unwrap();
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
|
||||
assert!(kv_store.get(b"key2").await.unwrap().is_none());
|
||||
assert!(kv_store.get(b"key3").await.unwrap().is_none());
|
||||
test_kv_batch_delete(kv_store).await;
|
||||
}
|
||||
}
|
||||
|
||||
352
src/common/meta/src/kv_backend/test.rs
Normal file
352
src/common/meta/src/kv_backend/test.rs
Normal file
@@ -0,0 +1,352 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::atomic::{AtomicU8, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{KvBackend, *};
|
||||
use crate::error::Error;
|
||||
use crate::rpc::store::{BatchGetRequest, PutRequest};
|
||||
use crate::rpc::KeyValue;
|
||||
use crate::util;
|
||||
|
||||
pub fn mock_kvs() -> Vec<KeyValue> {
|
||||
vec![
|
||||
KeyValue {
|
||||
key: b"key1".to_vec(),
|
||||
value: b"val1".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: b"key2".to_vec(),
|
||||
value: b"val2".to_vec(),
|
||||
},
|
||||
KeyValue {
|
||||
key: b"key3".to_vec(),
|
||||
value: b"val3".to_vec(),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
pub async fn prepare_kv(kv_store: &impl KvBackend) {
|
||||
let kvs = mock_kvs();
|
||||
assert!(kv_store
|
||||
.batch_put(BatchPutRequest {
|
||||
kvs,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.is_ok());
|
||||
|
||||
assert!(kv_store
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val11".to_vec(),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
pub async fn test_kv_put(kv_store: impl KvBackend) {
|
||||
let resp = kv_store
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val12".to_vec(),
|
||||
prev_kv: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(resp.prev_kv.is_none());
|
||||
|
||||
let resp = kv_store
|
||||
.put(PutRequest {
|
||||
key: b"key11".to_vec(),
|
||||
value: b"val13".to_vec(),
|
||||
prev_kv: true,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let prev_kv = resp.prev_kv.unwrap();
|
||||
assert_eq!(b"key11", prev_kv.key());
|
||||
assert_eq!(b"val12", prev_kv.value());
|
||||
}
|
||||
|
||||
pub async fn test_kv_range(kv_store: impl KvBackend) {
|
||||
let key = b"key1".to_vec();
|
||||
let range_end = util::get_prefix_end_key(b"key1");
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key: key.clone(),
|
||||
range_end: range_end.clone(),
|
||||
limit: 0,
|
||||
keys_only: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
assert_eq!(b"key11", resp.kvs[1].key());
|
||||
assert_eq!(b"val11", resp.kvs[1].value());
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key: key.clone(),
|
||||
range_end: range_end.clone(),
|
||||
limit: 0,
|
||||
keys_only: true,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"", resp.kvs[0].value());
|
||||
assert_eq!(b"key11", resp.kvs[1].key());
|
||||
assert_eq!(b"", resp.kvs[1].value());
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key: key.clone(),
|
||||
limit: 0,
|
||||
keys_only: false,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
|
||||
let resp = kv_store
|
||||
.range(RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
limit: 1,
|
||||
keys_only: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
}
|
||||
|
||||
pub async fn test_kv_range_2(kv_store: impl KvBackend) {
|
||||
kv_store
|
||||
.put(PutRequest::new().with_key("atest").with_value("value"))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
kv_store
|
||||
.put(PutRequest::new().with_key("test").with_value("value"))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// If both key and range_end are ‘\0’, then range represents all keys.
|
||||
let result = kv_store
|
||||
.range(RangeRequest::new().with_range(b"\0".to_vec(), b"\0".to_vec()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.kvs.len(), 2);
|
||||
assert!(!result.more);
|
||||
|
||||
// If range_end is ‘\0’, the range is all keys greater than or equal to the key argument.
|
||||
let result = kv_store
|
||||
.range(RangeRequest::new().with_range(b"a".to_vec(), b"\0".to_vec()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.kvs.len(), 2);
|
||||
|
||||
let result = kv_store
|
||||
.range(RangeRequest::new().with_range(b"b".to_vec(), b"\0".to_vec()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.kvs.len(), 1);
|
||||
assert_eq!(result.kvs[0].key, b"test");
|
||||
|
||||
// Fetches the keys >= "a", set limit to 1, the `more` should be true.
|
||||
let result = kv_store
|
||||
.range(
|
||||
RangeRequest::new()
|
||||
.with_range(b"a".to_vec(), b"\0".to_vec())
|
||||
.with_limit(1),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.kvs.len(), 1);
|
||||
assert!(result.more);
|
||||
|
||||
// Fetches the keys >= "a", set limit to 2, the `more` should be false.
|
||||
let result = kv_store
|
||||
.range(
|
||||
RangeRequest::new()
|
||||
.with_range(b"a".to_vec(), b"\0".to_vec())
|
||||
.with_limit(2),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.kvs.len(), 2);
|
||||
assert!(!result.more);
|
||||
|
||||
// Fetches the keys >= "a", set limit to 3, the `more` should be false.
|
||||
let result = kv_store
|
||||
.range(
|
||||
RangeRequest::new()
|
||||
.with_range(b"a".to_vec(), b"\0".to_vec())
|
||||
.with_limit(3),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.kvs.len(), 2);
|
||||
assert!(!result.more);
|
||||
}
|
||||
|
||||
pub async fn test_kv_batch_get(kv_store: impl KvBackend) {
|
||||
let keys = vec![];
|
||||
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
|
||||
|
||||
assert!(resp.kvs.is_empty());
|
||||
|
||||
let keys = vec![b"key10".to_vec()];
|
||||
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
|
||||
|
||||
assert!(resp.kvs.is_empty());
|
||||
|
||||
let keys = vec![b"key1".to_vec(), b"key3".to_vec(), b"key4".to_vec()];
|
||||
let resp = kv_store.batch_get(BatchGetRequest { keys }).await.unwrap();
|
||||
|
||||
assert_eq!(2, resp.kvs.len());
|
||||
assert_eq!(b"key1", resp.kvs[0].key());
|
||||
assert_eq!(b"val1", resp.kvs[0].value());
|
||||
assert_eq!(b"key3", resp.kvs[1].key());
|
||||
assert_eq!(b"val3", resp.kvs[1].value());
|
||||
}
|
||||
|
||||
pub async fn test_kv_compare_and_put(kv_store: Arc<dyn KvBackend<Error = Error>>) {
|
||||
let success = Arc::new(AtomicU8::new(0));
|
||||
|
||||
let mut joins = vec![];
|
||||
for _ in 0..20 {
|
||||
let kv_store_clone = kv_store.clone();
|
||||
let success_clone = success.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
let req = CompareAndPutRequest {
|
||||
key: b"key".to_vec(),
|
||||
expect: vec![],
|
||||
value: b"val_new".to_vec(),
|
||||
};
|
||||
let resp = kv_store_clone.compare_and_put(req).await.unwrap();
|
||||
if resp.success {
|
||||
success_clone.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
});
|
||||
joins.push(join);
|
||||
}
|
||||
|
||||
for join in joins {
|
||||
join.await.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(1, success.load(Ordering::SeqCst));
|
||||
}
|
||||
|
||||
pub async fn test_kv_delete_range(kv_store: impl KvBackend) {
|
||||
let req = DeleteRangeRequest {
|
||||
key: b"key3".to_vec(),
|
||||
range_end: vec![],
|
||||
prev_kv: true,
|
||||
};
|
||||
|
||||
let resp = kv_store.delete_range(req).await.unwrap();
|
||||
assert_eq!(1, resp.prev_kvs.len());
|
||||
assert_eq!(1, resp.deleted);
|
||||
assert_eq!(b"key3", resp.prev_kvs[0].key());
|
||||
assert_eq!(b"val3", resp.prev_kvs[0].value());
|
||||
|
||||
let resp = kv_store.get(b"key3").await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
|
||||
let req = DeleteRangeRequest {
|
||||
key: b"key2".to_vec(),
|
||||
range_end: vec![],
|
||||
prev_kv: false,
|
||||
};
|
||||
|
||||
let resp = kv_store.delete_range(req).await.unwrap();
|
||||
assert_eq!(1, resp.deleted);
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
|
||||
let resp = kv_store.get(b"key2").await.unwrap();
|
||||
assert!(resp.is_none());
|
||||
|
||||
let key = b"key1".to_vec();
|
||||
let range_end = util::get_prefix_end_key(b"key1");
|
||||
|
||||
let req = DeleteRangeRequest {
|
||||
key: key.clone(),
|
||||
range_end: range_end.clone(),
|
||||
prev_kv: true,
|
||||
};
|
||||
let resp = kv_store.delete_range(req).await.unwrap();
|
||||
assert_eq!(2, resp.prev_kvs.len());
|
||||
|
||||
let req = RangeRequest {
|
||||
key,
|
||||
range_end,
|
||||
..Default::default()
|
||||
};
|
||||
let resp = kv_store.range(req).await.unwrap();
|
||||
assert!(resp.kvs.is_empty());
|
||||
}
|
||||
|
||||
pub async fn test_kv_batch_delete(kv_store: impl KvBackend) {
|
||||
assert!(kv_store.get(b"key1").await.unwrap().is_some());
|
||||
assert!(kv_store.get(b"key100").await.unwrap().is_none());
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![b"key1".to_vec(), b"key100".to_vec()],
|
||||
prev_kv: true,
|
||||
};
|
||||
let resp = kv_store.batch_delete(req).await.unwrap();
|
||||
assert_eq!(1, resp.prev_kvs.len());
|
||||
assert_eq!(
|
||||
vec![KeyValue {
|
||||
key: b"key1".to_vec(),
|
||||
value: b"val1".to_vec()
|
||||
}],
|
||||
resp.prev_kvs
|
||||
);
|
||||
assert!(kv_store.get(b"key1").await.unwrap().is_none());
|
||||
|
||||
assert!(kv_store.get(b"key2").await.unwrap().is_some());
|
||||
assert!(kv_store.get(b"key3").await.unwrap().is_some());
|
||||
|
||||
let req = BatchDeleteRequest {
|
||||
keys: vec![b"key2".to_vec(), b"key3".to_vec()],
|
||||
prev_kv: false,
|
||||
};
|
||||
let resp = kv_store.batch_delete(req).await.unwrap();
|
||||
assert!(resp.prev_kvs.is_empty());
|
||||
|
||||
assert!(kv_store.get(b"key2").await.unwrap().is_none());
|
||||
assert!(kv_store.get(b"key3").await.unwrap().is_none());
|
||||
}
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::ops::Bound;
|
||||
|
||||
use api::v1::meta::{
|
||||
BatchDeleteRequest as PbBatchDeleteRequest, BatchDeleteResponse as PbBatchDeleteResponse,
|
||||
@@ -30,6 +31,17 @@ use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::rpc::{util, KeyValue};
|
||||
|
||||
pub fn to_range(key: Vec<u8>, range_end: Vec<u8>) -> (Bound<Vec<u8>>, Bound<Vec<u8>>) {
|
||||
match (&key[..], &range_end[..]) {
|
||||
(_, []) => (Bound::Included(key.clone()), Bound::Included(key)),
|
||||
// If both key and range_end are ‘\0’, then range represents all keys.
|
||||
([0], [0]) => (Bound::Unbounded, Bound::Unbounded),
|
||||
// If range_end is ‘\0’, the range is all keys greater than or equal to the key argument.
|
||||
(_, [0]) => (Bound::Included(key), Bound::Unbounded),
|
||||
(_, _) => (Bound::Included(key), Bound::Excluded(range_end)),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RangeRequest {
|
||||
/// key is the first key for the range, If range_end is not given, the
|
||||
@@ -96,6 +108,11 @@ impl RangeRequest {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the `RangeBounds`.
|
||||
pub fn range(&self) -> (Bound<Vec<u8>>, Bound<Vec<u8>>) {
|
||||
to_range(self.key.clone(), self.range_end.clone())
|
||||
}
|
||||
|
||||
/// key is the first key for the range, If range_end is not given, the
|
||||
/// request only looks up key.
|
||||
#[inline]
|
||||
@@ -690,6 +707,11 @@ impl DeleteRangeRequest {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the `RangeBounds`.
|
||||
pub fn range(&self) -> (Bound<Vec<u8>>, Bound<Vec<u8>>) {
|
||||
to_range(self.key.clone(), self.range_end.clone())
|
||||
}
|
||||
|
||||
/// key is the first key to delete in the range. If range_end is not given,
|
||||
/// the range is defined to contain only the key argument.
|
||||
#[inline]
|
||||
|
||||
@@ -1047,6 +1047,6 @@ mod tests {
|
||||
// Run the runner and execute the procedure.
|
||||
runner.run().await;
|
||||
let err = meta.state().error().unwrap().output_msg();
|
||||
assert!(err.contains("subprocedure failed"), "{err}");
|
||||
assert!(err.contains("Internal error"), "{err}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,6 +40,7 @@ pub type BoxedTaskFunction<E> = Box<dyn TaskFunction<E> + Send + Sync + 'static>
|
||||
struct TaskInner<E> {
|
||||
/// The repeated task handle. This handle is Some if the task is started.
|
||||
task_handle: Option<JoinHandle<()>>,
|
||||
|
||||
/// The task_fn to run. This is Some if the task is not started.
|
||||
task_fn: Option<BoxedTaskFunction<E>>,
|
||||
}
|
||||
@@ -50,6 +51,7 @@ pub struct RepeatedTask<E> {
|
||||
inner: Mutex<TaskInner<E>>,
|
||||
started: AtomicBool,
|
||||
interval: Duration,
|
||||
initial_delay: Option<Duration>,
|
||||
}
|
||||
|
||||
impl<E> std::fmt::Display for RepeatedTask<E> {
|
||||
@@ -75,6 +77,9 @@ impl<E> Drop for RepeatedTask<E> {
|
||||
}
|
||||
|
||||
impl<E: ErrorExt + 'static> RepeatedTask<E> {
|
||||
/// Creates a new repeated task. The `initial_delay` is the delay before the first execution.
|
||||
/// `initial_delay` default is None, the initial interval uses the `interval`.
|
||||
/// You can use `with_initial_delay` to set the `initial_delay`.
|
||||
pub fn new(interval: Duration, task_fn: BoxedTaskFunction<E>) -> Self {
|
||||
Self {
|
||||
name: task_fn.name().to_string(),
|
||||
@@ -85,9 +90,15 @@ impl<E: ErrorExt + 'static> RepeatedTask<E> {
|
||||
}),
|
||||
started: AtomicBool::new(false),
|
||||
interval,
|
||||
initial_delay: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_initial_delay(mut self, initial_delay: Option<Duration>) -> Self {
|
||||
self.initial_delay = initial_delay;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn started(&self) -> bool {
|
||||
self.started.load(Ordering::Relaxed)
|
||||
}
|
||||
@@ -99,17 +110,21 @@ impl<E: ErrorExt + 'static> RepeatedTask<E> {
|
||||
IllegalStateSnafu { name: &self.name }
|
||||
);
|
||||
|
||||
let interval = self.interval;
|
||||
let child = self.cancel_token.child_token();
|
||||
// Safety: The task is not started.
|
||||
let mut task_fn = inner.task_fn.take().unwrap();
|
||||
let interval = self.interval;
|
||||
let mut initial_delay = self.initial_delay;
|
||||
// TODO(hl): Maybe spawn to a blocking runtime.
|
||||
let handle = runtime.spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(interval) => {}
|
||||
_ = child.cancelled() => {
|
||||
return;
|
||||
let sleep_time = initial_delay.take().unwrap_or(interval);
|
||||
if sleep_time > Duration::ZERO {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(sleep_time) => {}
|
||||
_ = child.cancelled() => {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Err(e) = task_fn.call().await {
|
||||
@@ -192,4 +207,21 @@ mod tests {
|
||||
|
||||
assert_eq!(n.load(Ordering::Relaxed), 5);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repeated_task_prior_exec() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let n = Arc::new(AtomicI32::new(0));
|
||||
let task_fn = TickTask { n: n.clone() };
|
||||
|
||||
let task = RepeatedTask::new(Duration::from_millis(100), Box::new(task_fn))
|
||||
.with_initial_delay(Some(Duration::ZERO));
|
||||
|
||||
task.start(crate::bg_runtime()).unwrap();
|
||||
tokio::time::sleep(Duration::from_millis(550)).await;
|
||||
task.stop().await.unwrap();
|
||||
|
||||
assert_eq!(n.load(Ordering::Relaxed), 6);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -654,6 +654,17 @@ impl ListValue {
|
||||
Arc::new(new_item_field(output_type.item_type().as_arrow_type())),
|
||||
))
|
||||
}
|
||||
|
||||
/// use 'the first item size' * 'length of items' to estimate the size.
|
||||
/// it could be inaccurate.
|
||||
fn estimated_size(&self) -> usize {
|
||||
if let Some(items) = &self.items {
|
||||
if let Some(item) = items.first() {
|
||||
return item.as_value_ref().data_size() * items.len();
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ListValue {
|
||||
@@ -1090,12 +1101,46 @@ impl<'a> PartialOrd for ListValueRef<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ValueRef<'a> {
|
||||
/// Returns the size of the underlying data in bytes,
|
||||
/// The size is estimated and only considers the data size.
|
||||
pub fn data_size(&self) -> usize {
|
||||
match *self {
|
||||
ValueRef::Null => 0,
|
||||
ValueRef::Boolean(_) => 1,
|
||||
ValueRef::UInt8(_) => 1,
|
||||
ValueRef::UInt16(_) => 2,
|
||||
ValueRef::UInt32(_) => 4,
|
||||
ValueRef::UInt64(_) => 8,
|
||||
ValueRef::Int8(_) => 1,
|
||||
ValueRef::Int16(_) => 2,
|
||||
ValueRef::Int32(_) => 4,
|
||||
ValueRef::Int64(_) => 8,
|
||||
ValueRef::Float32(_) => 4,
|
||||
ValueRef::Float64(_) => 8,
|
||||
ValueRef::String(v) => std::mem::size_of_val(v),
|
||||
ValueRef::Binary(v) => std::mem::size_of_val(v),
|
||||
ValueRef::Date(_) => 4,
|
||||
ValueRef::DateTime(_) => 8,
|
||||
ValueRef::Timestamp(_) => 16,
|
||||
ValueRef::Time(_) => 16,
|
||||
ValueRef::Duration(_) => 16,
|
||||
ValueRef::Interval(_) => 24,
|
||||
ValueRef::List(v) => match v {
|
||||
ListValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(),
|
||||
ListValueRef::Ref { val } => val.estimated_size(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use num_traits::Float;
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::ListVectorBuilder;
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_value() {
|
||||
@@ -2158,4 +2203,90 @@ mod tests {
|
||||
duration_to_scalar_value(TimeUnit::Nanosecond, Some(1))
|
||||
);
|
||||
}
|
||||
|
||||
fn check_value_ref_size_eq(value_ref: &ValueRef, size: usize) {
|
||||
assert_eq!(value_ref.data_size(), size);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_value_ref_estimated_size() {
|
||||
assert_eq!(std::mem::size_of::<ValueRef>(), 24);
|
||||
|
||||
check_value_ref_size_eq(&ValueRef::Boolean(true), 1);
|
||||
check_value_ref_size_eq(&ValueRef::UInt8(1), 1);
|
||||
check_value_ref_size_eq(&ValueRef::UInt16(1), 2);
|
||||
check_value_ref_size_eq(&ValueRef::UInt32(1), 4);
|
||||
check_value_ref_size_eq(&ValueRef::UInt64(1), 8);
|
||||
check_value_ref_size_eq(&ValueRef::Int8(1), 1);
|
||||
check_value_ref_size_eq(&ValueRef::Int16(1), 2);
|
||||
check_value_ref_size_eq(&ValueRef::Int32(1), 4);
|
||||
check_value_ref_size_eq(&ValueRef::Int64(1), 8);
|
||||
check_value_ref_size_eq(&ValueRef::Float32(1.0.into()), 4);
|
||||
check_value_ref_size_eq(&ValueRef::Float64(1.0.into()), 8);
|
||||
check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10);
|
||||
check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10);
|
||||
check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4);
|
||||
check_value_ref_size_eq(&ValueRef::DateTime(DateTime::new(1)), 8);
|
||||
check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(
|
||||
&ValueRef::Interval(Interval::from_month_day_nano(1, 2, 3)),
|
||||
24,
|
||||
);
|
||||
check_value_ref_size_eq(&ValueRef::Duration(Duration::new_millisecond(1)), 16);
|
||||
check_value_ref_size_eq(
|
||||
&ValueRef::List(ListValueRef::Ref {
|
||||
val: &ListValue {
|
||||
items: Some(Box::new(vec![
|
||||
Value::String("hello world".into()),
|
||||
Value::String("greptimedb".into()),
|
||||
])),
|
||||
datatype: ConcreteDataType::string_datatype(),
|
||||
},
|
||||
}),
|
||||
22,
|
||||
);
|
||||
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
|
||||
for vec_opt in &data {
|
||||
if let Some(vec) = vec_opt {
|
||||
let values = vec.iter().map(|v| Value::from(*v)).collect();
|
||||
let values = Some(Box::new(values));
|
||||
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
|
||||
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
}
|
||||
let vector = builder.finish();
|
||||
|
||||
check_value_ref_size_eq(
|
||||
&ValueRef::List(ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 0,
|
||||
}),
|
||||
85,
|
||||
);
|
||||
check_value_ref_size_eq(
|
||||
&ValueRef::List(ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 1,
|
||||
}),
|
||||
85,
|
||||
);
|
||||
check_value_ref_size_eq(
|
||||
&ValueRef::List(ListValueRef::Indexed {
|
||||
vector: &vector,
|
||||
idx: 2,
|
||||
}),
|
||||
85,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,13 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::InsertRequests;
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use common_error::ext::BoxedError;
|
||||
use servers::error as server_error;
|
||||
use servers::error::AuthSnafu;
|
||||
use servers::opentsdb::codec::DataPoint;
|
||||
use servers::opentsdb::data_point_to_grpc_row_insert_requests;
|
||||
use servers::query_handler::OpentsdbProtocolHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -27,23 +27,27 @@ use crate::instance::Instance;
|
||||
|
||||
#[async_trait]
|
||||
impl OpentsdbProtocolHandler for Instance {
|
||||
async fn exec(&self, data_point: &DataPoint, ctx: QueryContextRef) -> server_error::Result<()> {
|
||||
async fn exec(
|
||||
&self,
|
||||
data_points: Vec<DataPoint>,
|
||||
ctx: QueryContextRef,
|
||||
) -> server_error::Result<usize> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::Opentsdb)
|
||||
.context(AuthSnafu)?;
|
||||
|
||||
let requests = InsertRequests {
|
||||
inserts: vec![data_point.as_grpc_insert()],
|
||||
};
|
||||
let _ = self
|
||||
.handle_inserts(requests, ctx)
|
||||
let (requests, _) = data_point_to_grpc_row_insert_requests(data_points)?;
|
||||
let output = self
|
||||
.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| server_error::ExecuteQuerySnafu {
|
||||
query: format!("{data_point:?}"),
|
||||
})?;
|
||||
Ok(())
|
||||
.context(servers::error::ExecuteGrpcQuerySnafu)?;
|
||||
|
||||
Ok(match output {
|
||||
common_query::Output::AffectedRows(rows) => rows,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,14 +19,18 @@ use metrics::counter;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::{
|
||||
ExportMetricsServiceRequest, ExportMetricsServiceResponse,
|
||||
};
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::{
|
||||
ExportTraceServiceRequest, ExportTraceServiceResponse,
|
||||
};
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::otlp;
|
||||
use servers::otlp::plugin::TraceParserRef;
|
||||
use servers::query_handler::OpenTelemetryProtocolHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::instance::Instance;
|
||||
use crate::metrics::OTLP_METRICS_ROWS;
|
||||
use crate::metrics::{OTLP_METRICS_ROWS, OTLP_TRACES_ROWS};
|
||||
|
||||
#[async_trait]
|
||||
impl OpenTelemetryProtocolHandler for Instance {
|
||||
@@ -40,7 +44,7 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::Otlp)
|
||||
.context(AuthSnafu)?;
|
||||
let (requests, rows) = otlp::to_grpc_insert_requests(request)?;
|
||||
let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request)?;
|
||||
let _ = self
|
||||
.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
@@ -55,4 +59,40 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
};
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
async fn traces(
|
||||
&self,
|
||||
request: ExportTraceServiceRequest,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<ExportTraceServiceResponse> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::Otlp)
|
||||
.context(AuthSnafu)?;
|
||||
|
||||
let (table_name, spans) = match self.plugins.get::<TraceParserRef>() {
|
||||
Some(parser) => (parser.table_name(), parser.parse(request)),
|
||||
None => (
|
||||
otlp::trace::TRACE_TABLE_NAME.to_string(),
|
||||
otlp::trace::parse(request),
|
||||
),
|
||||
};
|
||||
|
||||
let (requests, rows) = otlp::trace::to_grpc_insert_requests(table_name, spans)?;
|
||||
|
||||
let _ = self
|
||||
.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
|
||||
counter!(OTLP_TRACES_ROWS, rows as u64);
|
||||
|
||||
let resp = ExportTraceServiceResponse {
|
||||
// TODO(fys): add support for partial_success in future patch
|
||||
partial_success: None,
|
||||
};
|
||||
Ok(resp)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,3 +22,4 @@ pub(crate) const METRIC_RUN_SCRIPT_ELAPSED: &str = "frontend.run_script_elapsed"
|
||||
pub const PROM_STORE_REMOTE_WRITE_SAMPLES: &str = "frontend.prometheus.remote_write.samples";
|
||||
|
||||
pub const OTLP_METRICS_ROWS: &str = "frontend.otlp.metrics.rows";
|
||||
pub const OTLP_TRACES_ROWS: &str = "frontend.otlp.traces.rows";
|
||||
|
||||
@@ -19,7 +19,7 @@ common-base = { workspace = true }
|
||||
common-config = { workspace = true }
|
||||
common-error = { workspace = true }
|
||||
common-macro = { workspace = true }
|
||||
common-meta = { workspace = true }
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-runtime = { workspace = true }
|
||||
common-telemetry = { workspace = true }
|
||||
futures-util.workspace = true
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! [KvBackend] implementation based on [raft_engine::Engine].
|
||||
|
||||
use std::any::Any;
|
||||
use std::ops::Bound::{Excluded, Included, Unbounded};
|
||||
use std::sync::RwLock;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
@@ -28,6 +29,7 @@ use common_meta::rpc::store::{
|
||||
RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_meta::rpc::KeyValue;
|
||||
use common_meta::util::get_next_prefix_key;
|
||||
use raft_engine::{Config, Engine, LogBatch};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -137,29 +139,48 @@ impl KvBackend for RaftEngineBackend {
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> Result<RangeResponse, Self::Error> {
|
||||
let mut res = vec![];
|
||||
let (start, end) = req.range();
|
||||
let RangeRequest {
|
||||
keys_only, limit, ..
|
||||
} = req;
|
||||
|
||||
let (start_key, end_key) = match (start, end) {
|
||||
(Included(start), Included(end)) => (Some(start), Some(get_next_prefix_key(&end))),
|
||||
(Unbounded, Unbounded) => (None, None),
|
||||
(Included(start), Excluded(end)) => (Some(start), Some(end)),
|
||||
(Included(start), Unbounded) => (Some(start), None),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let mut more = false;
|
||||
let mut iter = 0;
|
||||
|
||||
self.engine
|
||||
.read()
|
||||
.unwrap()
|
||||
.scan_raw_messages(
|
||||
SYSTEM_NAMESPACE,
|
||||
Some(&req.key),
|
||||
Some(&req.range_end),
|
||||
start_key.as_deref(),
|
||||
end_key.as_deref(),
|
||||
false,
|
||||
|key, value| {
|
||||
res.push(KeyValue {
|
||||
key: key.to_vec(),
|
||||
value: value.to_vec(),
|
||||
});
|
||||
true
|
||||
let take = limit == 0 || iter != limit;
|
||||
iter += 1;
|
||||
more = limit > 0 && iter > limit;
|
||||
|
||||
if take {
|
||||
res.push(KeyValue {
|
||||
key: key.to_vec(),
|
||||
value: if keys_only { vec![] } else { value.to_vec() },
|
||||
});
|
||||
}
|
||||
|
||||
take
|
||||
},
|
||||
)
|
||||
.context(RaftEngineSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(meta_error::ExternalSnafu)?;
|
||||
Ok(RangeResponse {
|
||||
kvs: res,
|
||||
more: false,
|
||||
})
|
||||
Ok(RangeResponse { kvs: res, more })
|
||||
}
|
||||
|
||||
async fn put(&self, req: PutRequest) -> Result<PutResponse, Self::Error> {
|
||||
@@ -275,7 +296,7 @@ impl KvBackend for RaftEngineBackend {
|
||||
key,
|
||||
range_end,
|
||||
limit: 0,
|
||||
keys_only: true,
|
||||
keys_only: false,
|
||||
};
|
||||
let range_resp = self.range(range).await?;
|
||||
|
||||
@@ -383,7 +404,12 @@ fn engine_delete(engine: &Engine, key: &[u8]) -> meta_error::Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::kv_backend::test::{
|
||||
prepare_kv, test_kv_batch_delete, test_kv_batch_get, test_kv_compare_and_put,
|
||||
test_kv_delete_range, test_kv_put, test_kv_range, test_kv_range_2,
|
||||
};
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
|
||||
@@ -615,4 +641,66 @@ mod tests {
|
||||
keys
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range() {
|
||||
let dir = create_temp_dir("range");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
prepare_kv(&backend).await;
|
||||
|
||||
test_kv_range(backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_range_2() {
|
||||
let dir = create_temp_dir("range2");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
|
||||
test_kv_range_2(backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_put() {
|
||||
let dir = create_temp_dir("put");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
prepare_kv(&backend).await;
|
||||
|
||||
test_kv_put(backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_get() {
|
||||
let dir = create_temp_dir("batch_get");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
prepare_kv(&backend).await;
|
||||
|
||||
test_kv_batch_get(backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_batch_delete() {
|
||||
let dir = create_temp_dir("batch_delete");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
prepare_kv(&backend).await;
|
||||
|
||||
test_kv_batch_delete(backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_range() {
|
||||
let dir = create_temp_dir("delete_range");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
prepare_kv(&backend).await;
|
||||
|
||||
test_kv_delete_range(backend).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_compare_and_put_2() {
|
||||
let dir = create_temp_dir("compare_and_put");
|
||||
let backend = build_kv_backend(dir.path().to_str().unwrap().to_string());
|
||||
prepare_kv(&backend).await;
|
||||
|
||||
test_kv_compare_and_put(Arc::new(backend)).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::region_request::{RegionCloseRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::config::MitoConfig;
|
||||
@@ -55,6 +55,37 @@ async fn test_engine_create_existing_region() {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_close_create_region() {
|
||||
// This test will trigger create_or_open function.
|
||||
let mut env = TestEnv::with_prefix("create-close-create");
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let builder = CreateRequestBuilder::new();
|
||||
// Create a region with id 1.
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(builder.build()))
|
||||
.await
|
||||
.unwrap();
|
||||
// Close the region.
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Close(RegionCloseRequest {}))
|
||||
.await
|
||||
.unwrap();
|
||||
// Create the same region id again.
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(builder.build()))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(engine.is_region_exists(region_id));
|
||||
|
||||
let region = engine.get_region(region_id).unwrap();
|
||||
|
||||
assert!(region.is_writable());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_with_different_id() {
|
||||
let mut env = TestEnv::new();
|
||||
|
||||
@@ -17,7 +17,7 @@ use common_query::logical_plan::DfExpr;
|
||||
use common_query::prelude::Expr;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datafusion_expr::lit;
|
||||
use datafusion_expr::{col, lit};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
@@ -46,7 +46,7 @@ async fn check_prune_row_groups(expr: DfExpr, expected: &str) {
|
||||
region_id,
|
||||
Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(0, 10),
|
||||
rows: build_rows(0, 15),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@@ -76,6 +76,16 @@ async fn test_read_parquet_stats() {
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 10 | 10.0 | 1970-01-01T00:00:10 |
|
||||
| 11 | 11.0 | 1970-01-01T00:00:11 |
|
||||
| 12 | 12.0 | 1970-01-01T00:00:12 |
|
||||
| 13 | 13.0 | 1970-01-01T00:00:13 |
|
||||
| 14 | 14.0 | 1970-01-01T00:00:14 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
| 3 | 3.0 | 1970-01-01T00:00:03 |
|
||||
| 4 | 4.0 | 1970-01-01T00:00:04 |
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
| 6 | 6.0 | 1970-01-01T00:00:06 |
|
||||
| 7 | 7.0 | 1970-01-01T00:00:07 |
|
||||
@@ -84,7 +94,11 @@ async fn test_read_parquet_stats() {
|
||||
+-------+---------+---------------------+",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prune_tag() {
|
||||
// prune result: only row group 1&2
|
||||
check_prune_row_groups(
|
||||
datafusion_expr::col("tag_0").gt(lit(ScalarValue::Utf8(Some("4".to_string())))),
|
||||
"\
|
||||
@@ -100,3 +114,25 @@ async fn test_read_parquet_stats() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prune_tag_and_field() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
// prune result: only row group 1
|
||||
check_prune_row_groups(
|
||||
col("tag_0")
|
||||
.gt(lit(ScalarValue::Utf8(Some("4".to_string()))))
|
||||
.and(col("field_0").lt(lit(8.0))),
|
||||
"\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
| 6 | 6.0 | 1970-01-01T00:00:06 |
|
||||
| 7 | 7.0 | 1970-01-01T00:00:07 |
|
||||
| 8 | 8.0 | 1970-01-01T00:00:08 |
|
||||
| 9 | 9.0 | 1970-01-01T00:00:09 |
|
||||
+-------+---------+---------------------+",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -154,6 +154,12 @@ impl RegionManifestManager {
|
||||
let inner = self.inner.read().await;
|
||||
inner.store.clone()
|
||||
}
|
||||
|
||||
/// Returns total manifest size.
|
||||
pub async fn manifest_size(&self) -> u64 {
|
||||
let inner = self.inner.read().await;
|
||||
inner.total_manifest_size()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -186,7 +192,7 @@ impl RegionManifestManagerInner {
|
||||
/// Creates a new manifest.
|
||||
async fn new(metadata: RegionMetadataRef, options: RegionManifestOptions) -> Result<Self> {
|
||||
// construct storage
|
||||
let store = ManifestObjectStore::new(
|
||||
let mut store = ManifestObjectStore::new(
|
||||
&options.manifest_dir,
|
||||
options.object_store.clone(),
|
||||
options.compress_type,
|
||||
@@ -232,7 +238,7 @@ impl RegionManifestManagerInner {
|
||||
/// Returns `Ok(None)` if no such manifest.
|
||||
async fn open(options: RegionManifestOptions) -> Result<Option<Self>> {
|
||||
// construct storage
|
||||
let store = ManifestObjectStore::new(
|
||||
let mut store = ManifestObjectStore::new(
|
||||
&options.manifest_dir,
|
||||
options.object_store.clone(),
|
||||
options.compress_type,
|
||||
@@ -240,8 +246,9 @@ impl RegionManifestManagerInner {
|
||||
|
||||
// recover from storage
|
||||
// construct manifest builder
|
||||
// calculate the manifest size from the latest checkpoint
|
||||
let mut version = MIN_VERSION;
|
||||
let checkpoint = Self::last_checkpoint(&store).await?;
|
||||
let checkpoint = Self::last_checkpoint(&mut store).await?;
|
||||
let last_checkpoint_version = checkpoint
|
||||
.as_ref()
|
||||
.map(|checkpoint| checkpoint.last_version)
|
||||
@@ -265,6 +272,8 @@ impl RegionManifestManagerInner {
|
||||
let mut action_iter = store.scan(version, MAX_VERSION).await?;
|
||||
while let Some((manifest_version, raw_action_list)) = action_iter.next_log().await? {
|
||||
let action_list = RegionMetaActionList::decode(&raw_action_list)?;
|
||||
// set manifest size after last checkpoint
|
||||
store.set_delta_file_size(manifest_version, raw_action_list.len() as u64);
|
||||
for action in action_list.actions {
|
||||
match action {
|
||||
RegionMetaAction::Change(action) => {
|
||||
@@ -312,6 +321,7 @@ impl RegionManifestManagerInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update the manifest. Return the current manifest version number.
|
||||
async fn update(&mut self, action_list: RegionMetaActionList) -> Result<ManifestVersion> {
|
||||
let version = self.increase_version();
|
||||
self.store.save(version, &action_list.encode()?).await?;
|
||||
@@ -343,6 +353,11 @@ impl RegionManifestManagerInner {
|
||||
|
||||
Ok(version)
|
||||
}
|
||||
|
||||
/// Returns total manifest size.
|
||||
pub(crate) fn total_manifest_size(&self) -> u64 {
|
||||
self.store.total_manifest_size()
|
||||
}
|
||||
}
|
||||
|
||||
impl RegionManifestManagerInner {
|
||||
@@ -369,8 +384,8 @@ impl RegionManifestManagerInner {
|
||||
}
|
||||
|
||||
/// Make a new checkpoint. Return the fresh one if there are some actions to compact.
|
||||
async fn do_checkpoint(&self) -> Result<Option<RegionCheckpoint>> {
|
||||
let last_checkpoint = Self::last_checkpoint(&self.store).await?;
|
||||
async fn do_checkpoint(&mut self) -> Result<Option<RegionCheckpoint>> {
|
||||
let last_checkpoint = Self::last_checkpoint(&mut self.store).await?;
|
||||
let current_version = self.last_version;
|
||||
|
||||
let (start_version, mut manifest_builder) = if let Some(checkpoint) = last_checkpoint {
|
||||
@@ -441,7 +456,7 @@ impl RegionManifestManagerInner {
|
||||
|
||||
/// Fetch the last [RegionCheckpoint] from storage.
|
||||
pub(crate) async fn last_checkpoint(
|
||||
store: &ManifestObjectStore,
|
||||
store: &mut ManifestObjectStore,
|
||||
) -> Result<Option<RegionCheckpoint>> {
|
||||
let last_checkpoint = store.load_last_checkpoint().await?;
|
||||
|
||||
@@ -456,14 +471,16 @@ impl RegionManifestManagerInner {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use common_datasource::compression::CompressionType;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
|
||||
|
||||
use super::*;
|
||||
use crate::manifest::action::RegionChange;
|
||||
use crate::manifest::action::{RegionChange, RegionEdit};
|
||||
use crate::manifest::tests::utils::basic_region_metadata;
|
||||
use crate::test_util::TestEnv;
|
||||
|
||||
@@ -546,4 +563,95 @@ mod test {
|
||||
.unwrap();
|
||||
manager.validate_manifest(&new_metadata, 1).await;
|
||||
}
|
||||
|
||||
/// Just for test, refer to wal_dir_usage in src/store-api/src/logstore.rs.
|
||||
async fn manifest_dir_usage(path: &str) -> u64 {
|
||||
let mut size = 0;
|
||||
let mut read_dir = tokio::fs::read_dir(path).await.unwrap();
|
||||
while let Ok(dir_entry) = read_dir.next_entry().await {
|
||||
let Some(entry) = dir_entry else {
|
||||
break;
|
||||
};
|
||||
if entry.file_type().await.unwrap().is_file() {
|
||||
let file_name = entry.file_name().into_string().unwrap();
|
||||
if file_name.contains(".checkpoint") || file_name.contains(".json") {
|
||||
let file_size = entry.metadata().await.unwrap().len() as usize;
|
||||
debug!("File: {file_name:?}, size: {file_size}");
|
||||
size += file_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
size as u64
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manifest_size() {
|
||||
let metadata = Arc::new(basic_region_metadata());
|
||||
let data_home = create_temp_dir("");
|
||||
let data_home_path = data_home.path().to_str().unwrap().to_string();
|
||||
let env = TestEnv::with_data_home(data_home);
|
||||
|
||||
let manifest_dir = format!("{}/manifest", data_home_path);
|
||||
|
||||
let manager = env
|
||||
.create_manifest_manager(CompressionType::Uncompressed, 10, Some(metadata.clone()))
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let mut new_metadata_builder = RegionMetadataBuilder::from_existing((*metadata).clone());
|
||||
new_metadata_builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("val2", ConcreteDataType::float64_datatype(), false),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 252,
|
||||
});
|
||||
let new_metadata = Arc::new(new_metadata_builder.build().unwrap());
|
||||
|
||||
let action_list =
|
||||
RegionMetaActionList::with_action(RegionMetaAction::Change(RegionChange {
|
||||
metadata: new_metadata.clone(),
|
||||
}));
|
||||
|
||||
let current_version = manager.update(action_list).await.unwrap();
|
||||
assert_eq!(current_version, 1);
|
||||
manager.validate_manifest(&new_metadata, 1).await;
|
||||
|
||||
// get manifest size
|
||||
let manifest_size = manager.manifest_size().await;
|
||||
assert_eq!(manifest_size, manifest_dir_usage(&manifest_dir).await);
|
||||
|
||||
// update 10 times nop_action to trigger checkpoint
|
||||
for _ in 0..10 {
|
||||
manager
|
||||
.update(RegionMetaActionList::new(vec![RegionMetaAction::Edit(
|
||||
RegionEdit {
|
||||
files_to_add: vec![],
|
||||
files_to_remove: vec![],
|
||||
compaction_time_window: None,
|
||||
flushed_entry_id: None,
|
||||
flushed_sequence: None,
|
||||
},
|
||||
)]))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// check manifest size again
|
||||
let manifest_size = manager.manifest_size().await;
|
||||
assert_eq!(manifest_size, manifest_dir_usage(&manifest_dir).await);
|
||||
|
||||
// Reopen the manager,
|
||||
// we just calculate the size from the latest checkpoint file
|
||||
manager.stop().await.unwrap();
|
||||
let manager = env
|
||||
.create_manifest_manager(CompressionType::Uncompressed, 10, None)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
manager.validate_manifest(&new_metadata, 11).await;
|
||||
|
||||
// get manifest size again
|
||||
let manifest_size = manager.manifest_size().await;
|
||||
assert_eq!(manifest_size, 1312);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,11 +129,22 @@ impl ObjectStoreLogIterator {
|
||||
}
|
||||
}
|
||||
|
||||
/// Key to identify a manifest file.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
|
||||
enum FileKey {
|
||||
/// A delta file (`.json`).
|
||||
Delta(ManifestVersion),
|
||||
/// A checkpoint file (`.checkpoint`).
|
||||
Checkpoint(ManifestVersion),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ManifestObjectStore {
|
||||
object_store: ObjectStore,
|
||||
compress_type: CompressionType,
|
||||
path: String,
|
||||
/// Stores the size of each manifest file.
|
||||
manifest_size_map: HashMap<FileKey, u64>,
|
||||
}
|
||||
|
||||
impl ManifestObjectStore {
|
||||
@@ -142,6 +153,7 @@ impl ManifestObjectStore {
|
||||
object_store,
|
||||
compress_type,
|
||||
path: util::normalize_dir(path),
|
||||
manifest_size_map: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,6 +196,7 @@ impl ManifestObjectStore {
|
||||
.context(OpenDalSnafu)
|
||||
}
|
||||
|
||||
/// Scan the manifest files in the range of [start, end) and return the iterator.
|
||||
pub async fn scan(
|
||||
&self,
|
||||
start: ManifestVersion,
|
||||
@@ -212,8 +225,12 @@ impl ManifestObjectStore {
|
||||
})
|
||||
}
|
||||
|
||||
/// Delete manifest files that version < end.
|
||||
/// If keep_last_checkpoint is true, the last checkpoint file will be kept.
|
||||
/// ### Return
|
||||
/// The number of deleted files.
|
||||
pub async fn delete_until(
|
||||
&self,
|
||||
&mut self,
|
||||
end: ManifestVersion,
|
||||
keep_last_checkpoint: bool,
|
||||
) -> Result<usize> {
|
||||
@@ -248,7 +265,7 @@ impl ManifestObjectStore {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let paths: Vec<_> = entries
|
||||
let del_entries: Vec<_> = entries
|
||||
.iter()
|
||||
.filter(|(_e, is_checkpoint, version)| {
|
||||
if let Some(max_version) = checkpoint_version {
|
||||
@@ -264,12 +281,15 @@ impl ManifestObjectStore {
|
||||
true
|
||||
}
|
||||
})
|
||||
.map(|e| e.0.path().to_string())
|
||||
.collect();
|
||||
let paths = del_entries
|
||||
.iter()
|
||||
.map(|(e, _, _)| e.path().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
let ret = paths.len();
|
||||
|
||||
debug!(
|
||||
"Deleting {} logs from manifest storage path {} until {}, checkpoint: {:?}, paths: {:?}",
|
||||
"Deleting {} logs from manifest storage path {} until {}, checkpoint_version: {:?}, paths: {:?}",
|
||||
ret,
|
||||
self.path,
|
||||
end,
|
||||
@@ -282,10 +302,21 @@ impl ManifestObjectStore {
|
||||
.await
|
||||
.context(OpenDalSnafu)?;
|
||||
|
||||
// delete manifest sizes
|
||||
for (_, is_checkpoint, version) in &del_entries {
|
||||
if *is_checkpoint {
|
||||
self.manifest_size_map
|
||||
.remove(&FileKey::Checkpoint(*version));
|
||||
} else {
|
||||
self.manifest_size_map.remove(&FileKey::Delta(*version));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub async fn save(&self, version: ManifestVersion, bytes: &[u8]) -> Result<()> {
|
||||
/// Save the delta manifest file.
|
||||
pub async fn save(&mut self, version: ManifestVersion, bytes: &[u8]) -> Result<()> {
|
||||
let path = self.delta_file_path(version);
|
||||
debug!("Save log to manifest storage, version: {}", version);
|
||||
let data = self
|
||||
@@ -296,13 +327,17 @@ impl ManifestObjectStore {
|
||||
compress_type: self.compress_type,
|
||||
path: &path,
|
||||
})?;
|
||||
let delta_size = data.len();
|
||||
self.object_store
|
||||
.write(&path, data)
|
||||
.await
|
||||
.context(OpenDalSnafu)
|
||||
.context(OpenDalSnafu)?;
|
||||
self.set_delta_file_size(version, delta_size as u64);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn save_checkpoint(&self, version: ManifestVersion, bytes: &[u8]) -> Result<()> {
|
||||
/// Save the checkpoint manifest file.
|
||||
pub async fn save_checkpoint(&mut self, version: ManifestVersion, bytes: &[u8]) -> Result<()> {
|
||||
let path = self.checkpoint_file_path(version);
|
||||
let data = self
|
||||
.compress_type
|
||||
@@ -312,10 +347,12 @@ impl ManifestObjectStore {
|
||||
compress_type: self.compress_type,
|
||||
path: &path,
|
||||
})?;
|
||||
let checkpoint_size = data.len();
|
||||
self.object_store
|
||||
.write(&path, data)
|
||||
.await
|
||||
.context(OpenDalSnafu)?;
|
||||
self.set_checkpoint_file_size(version, checkpoint_size as u64);
|
||||
|
||||
// Because last checkpoint file only contain size and version, which is tiny, so we don't compress it.
|
||||
let last_checkpoint_path = self.last_checkpoint_path();
|
||||
@@ -342,7 +379,7 @@ impl ManifestObjectStore {
|
||||
}
|
||||
|
||||
pub async fn load_checkpoint(
|
||||
&self,
|
||||
&mut self,
|
||||
version: ManifestVersion,
|
||||
) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
|
||||
let path = self.checkpoint_file_path(version);
|
||||
@@ -351,12 +388,15 @@ impl ManifestObjectStore {
|
||||
let checkpoint_data =
|
||||
match self.object_store.read(&path).await {
|
||||
Ok(checkpoint) => {
|
||||
let checkpoint_size = checkpoint.len();
|
||||
let decompress_data = self.compress_type.decode(checkpoint).await.context(
|
||||
DecompressObjectSnafu {
|
||||
compress_type: self.compress_type,
|
||||
path,
|
||||
},
|
||||
)?;
|
||||
// set the checkpoint size
|
||||
self.set_checkpoint_file_size(version, checkpoint_size as u64);
|
||||
Ok(Some(decompress_data))
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -373,6 +413,7 @@ impl ManifestObjectStore {
|
||||
);
|
||||
match self.object_store.read(&fall_back_path).await {
|
||||
Ok(checkpoint) => {
|
||||
let checkpoint_size = checkpoint.len();
|
||||
let decompress_data = FALL_BACK_COMPRESS_TYPE
|
||||
.decode(checkpoint)
|
||||
.await
|
||||
@@ -380,6 +421,7 @@ impl ManifestObjectStore {
|
||||
compress_type: FALL_BACK_COMPRESS_TYPE,
|
||||
path,
|
||||
})?;
|
||||
self.set_checkpoint_file_size(version, checkpoint_size as u64);
|
||||
Ok(Some(decompress_data))
|
||||
}
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
|
||||
@@ -398,7 +440,7 @@ impl ManifestObjectStore {
|
||||
|
||||
/// Load the latest checkpoint.
|
||||
/// Return manifest version and the raw [RegionCheckpoint](crate::manifest::action::RegionCheckpoint) content if any
|
||||
pub async fn load_last_checkpoint(&self) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
|
||||
pub async fn load_last_checkpoint(&mut self) -> Result<Option<(ManifestVersion, Vec<u8>)>> {
|
||||
let last_checkpoint_path = self.last_checkpoint_path();
|
||||
let last_checkpoint_data = match self.object_store.read(&last_checkpoint_path).await {
|
||||
Ok(data) => data,
|
||||
@@ -424,6 +466,22 @@ impl ManifestObjectStore {
|
||||
pub async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
|
||||
self.object_store.read(path).await.context(OpenDalSnafu)
|
||||
}
|
||||
|
||||
/// Compute the size(Byte) in manifest size map.
|
||||
pub(crate) fn total_manifest_size(&self) -> u64 {
|
||||
self.manifest_size_map.values().sum()
|
||||
}
|
||||
|
||||
/// Set the size of the delta file by delta version.
|
||||
pub(crate) fn set_delta_file_size(&mut self, version: ManifestVersion, size: u64) {
|
||||
self.manifest_size_map.insert(FileKey::Delta(version), size);
|
||||
}
|
||||
|
||||
/// Set the size of the checkpoint file by checkpoint version.
|
||||
pub(crate) fn set_checkpoint_file_size(&mut self, version: ManifestVersion, size: u64) {
|
||||
self.manifest_size_map
|
||||
.insert(FileKey::Checkpoint(version), size);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -489,7 +547,7 @@ mod tests {
|
||||
test_manifest_log_store_case(log_store).await;
|
||||
}
|
||||
|
||||
async fn test_manifest_log_store_case(log_store: ManifestObjectStore) {
|
||||
async fn test_manifest_log_store_case(mut log_store: ManifestObjectStore) {
|
||||
for v in 0..5 {
|
||||
log_store
|
||||
.save(v, format!("hello, {v}").as_bytes())
|
||||
@@ -600,4 +658,92 @@ mod tests {
|
||||
let mut it = log_store.scan(0, 10).await.unwrap();
|
||||
assert!(it.next_log().await.unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_version() {
|
||||
let version = file_version("00000000000000000007.checkpoint");
|
||||
assert_eq!(version, 7);
|
||||
|
||||
let name = delta_file(version);
|
||||
assert_eq!(name, "00000000000000000007.json");
|
||||
|
||||
let name = checkpoint_file(version);
|
||||
assert_eq!(name, "00000000000000000007.checkpoint");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_uncompressed_manifest_files_size() {
|
||||
let mut log_store = new_test_manifest_store();
|
||||
// write 5 manifest files with uncompressed(8B per file)
|
||||
log_store.compress_type = CompressionType::Uncompressed;
|
||||
for v in 0..5 {
|
||||
log_store
|
||||
.save(v, format!("hello, {v}").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
// write 1 checkpoint file with uncompressed(23B)
|
||||
log_store
|
||||
.save_checkpoint(5, "checkpoint_uncompressed".as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// manifest files size
|
||||
assert_eq!(log_store.total_manifest_size(), 63);
|
||||
|
||||
// delete 3 manifest files
|
||||
assert_eq!(log_store.delete_until(3, false).await.unwrap(), 3);
|
||||
|
||||
// manifest files size after delete
|
||||
assert_eq!(log_store.total_manifest_size(), 39);
|
||||
|
||||
// delete all manifest files
|
||||
assert_eq!(
|
||||
log_store
|
||||
.delete_until(ManifestVersion::MAX, false)
|
||||
.await
|
||||
.unwrap(),
|
||||
3
|
||||
);
|
||||
|
||||
assert_eq!(log_store.total_manifest_size(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compressed_manifest_files_size() {
|
||||
let mut log_store = new_test_manifest_store();
|
||||
// Test with compressed manifest files
|
||||
log_store.compress_type = CompressionType::Gzip;
|
||||
// write 5 manifest files
|
||||
for v in 0..5 {
|
||||
log_store
|
||||
.save(v, format!("hello, {v}").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
log_store
|
||||
.save_checkpoint(5, "checkpoint_compressed".as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// manifest files size
|
||||
assert_eq!(log_store.total_manifest_size(), 181);
|
||||
|
||||
// delete 3 manifest files
|
||||
assert_eq!(log_store.delete_until(3, false).await.unwrap(), 3);
|
||||
|
||||
// manifest files size after delete
|
||||
assert_eq!(log_store.total_manifest_size(), 97);
|
||||
|
||||
// delete all manifest files
|
||||
assert_eq!(
|
||||
log_store
|
||||
.delete_until(ManifestVersion::MAX, false)
|
||||
.await
|
||||
.unwrap(),
|
||||
3
|
||||
);
|
||||
|
||||
assert_eq!(log_store.total_manifest_size(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,7 +202,7 @@ async fn generate_checkpoint_with_compression_types(
|
||||
manager.update(action).await.unwrap();
|
||||
}
|
||||
|
||||
RegionManifestManagerInner::last_checkpoint(&manager.store().await)
|
||||
RegionManifestManagerInner::last_checkpoint(&mut manager.store().await)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
|
||||
@@ -20,8 +20,11 @@ use std::sync::{Arc, RwLock};
|
||||
|
||||
use api::v1::OpType;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::physical_plan::PhysicalExpr;
|
||||
use datafusion_common::ScalarValue;
|
||||
use datafusion_expr::ColumnarValue;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::arrow::array::{ArrayRef, BooleanArray};
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector, VectorRef};
|
||||
@@ -300,12 +303,16 @@ impl SeriesSet {
|
||||
let (primary_key_builders, primary_key_schema) =
|
||||
primary_key_builders(&self.region_metadata, 1);
|
||||
|
||||
let physical_exprs: Vec<_> = predicate
|
||||
.and_then(|p| p.to_physical_exprs(&primary_key_schema).ok())
|
||||
.unwrap_or_default();
|
||||
|
||||
Iter {
|
||||
metadata: self.region_metadata.clone(),
|
||||
series: self.series.clone(),
|
||||
projection,
|
||||
last_key: None,
|
||||
predicate,
|
||||
predicate: physical_exprs,
|
||||
pk_schema: primary_key_schema,
|
||||
primary_key_builders,
|
||||
codec: self.codec.clone(),
|
||||
@@ -341,7 +348,7 @@ struct Iter {
|
||||
series: Arc<SeriesRwLockMap>,
|
||||
projection: HashSet<ColumnId>,
|
||||
last_key: Option<Vec<u8>>,
|
||||
predicate: Option<Predicate>,
|
||||
predicate: Vec<Arc<dyn PhysicalExpr>>,
|
||||
pk_schema: arrow::datatypes::SchemaRef,
|
||||
primary_key_builders: Vec<Box<dyn MutableVector>>,
|
||||
codec: Arc<McmpRowCodec>,
|
||||
@@ -362,18 +369,18 @@ impl Iterator for Iter {
|
||||
// TODO(hl): maybe yield more than one time series to amortize range overhead.
|
||||
for (primary_key, series) in range {
|
||||
let mut series = series.write().unwrap();
|
||||
if let Some(predicate) = &self.predicate {
|
||||
if !prune_primary_key(
|
||||
if !self.predicate.is_empty()
|
||||
&& !prune_primary_key(
|
||||
&self.codec,
|
||||
primary_key.as_slice(),
|
||||
&mut series,
|
||||
&mut self.primary_key_builders,
|
||||
self.pk_schema.clone(),
|
||||
predicate,
|
||||
) {
|
||||
// read next series
|
||||
continue;
|
||||
}
|
||||
&self.predicate,
|
||||
)
|
||||
{
|
||||
// read next series
|
||||
continue;
|
||||
}
|
||||
self.last_key = Some(primary_key.clone());
|
||||
|
||||
@@ -392,7 +399,7 @@ fn prune_primary_key(
|
||||
series: &mut Series,
|
||||
builders: &mut Vec<Box<dyn MutableVector>>,
|
||||
pk_schema: arrow::datatypes::SchemaRef,
|
||||
predicate: &Predicate,
|
||||
predicate: &[Arc<dyn PhysicalExpr>],
|
||||
) -> bool {
|
||||
// no primary key, we simply return true.
|
||||
if pk_schema.fields().is_empty() {
|
||||
@@ -400,20 +407,52 @@ fn prune_primary_key(
|
||||
}
|
||||
|
||||
if let Some(rb) = series.pk_cache.as_ref() {
|
||||
let res = predicate.prune_primary_key(rb).unwrap_or(true);
|
||||
let res = prune_inner(predicate, rb).unwrap_or(true);
|
||||
debug!("Prune primary key: {:?}, res: {:?}", rb, res);
|
||||
res
|
||||
} else {
|
||||
let Ok(rb) = pk_to_record_batch(codec, pk, builders, pk_schema) else {
|
||||
return true;
|
||||
};
|
||||
let res = predicate.prune_primary_key(&rb).unwrap_or(true);
|
||||
let res = prune_inner(predicate, &rb).unwrap_or(true);
|
||||
debug!("Prune primary key: {:?}, res: {:?}", rb, res);
|
||||
series.update_pk_cache(rb);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
fn prune_inner(predicates: &[Arc<dyn PhysicalExpr>], primary_key: &RecordBatch) -> Result<bool> {
|
||||
for expr in predicates {
|
||||
// evaluate every filter against primary key
|
||||
let Ok(eva) = expr.evaluate(primary_key) else {
|
||||
continue;
|
||||
};
|
||||
let result = match eva {
|
||||
ColumnarValue::Array(array) => {
|
||||
let predicate_array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
|
||||
predicate_array
|
||||
.into_iter()
|
||||
.map(|x| x.unwrap_or(true))
|
||||
.next()
|
||||
.unwrap_or(true)
|
||||
}
|
||||
// result was a column
|
||||
ColumnarValue::Scalar(ScalarValue::Boolean(v)) => v.unwrap_or(true),
|
||||
_ => {
|
||||
unreachable!("Unexpected primary key record batch evaluation result: {:?}, primary key: {:?}", eva, primary_key);
|
||||
}
|
||||
};
|
||||
debug!(
|
||||
"Evaluate primary key {:?} against filter: {:?}, result: {:?}",
|
||||
primary_key, expr, result
|
||||
);
|
||||
if !result {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn pk_to_record_batch(
|
||||
codec: &Arc<McmpRowCodec>,
|
||||
bytes: &[u8],
|
||||
|
||||
@@ -17,13 +17,12 @@
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use common_time::range::TimestampRange;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::ScanRequest;
|
||||
use table::predicate::{Predicate, TimeRangePredicateBuilder};
|
||||
|
||||
use crate::access_layer::AccessLayerRef;
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::error::{BuildPredicateSnafu, Result};
|
||||
use crate::error::Result;
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::seq_scan::SeqScan;
|
||||
use crate::region::version::VersionRef;
|
||||
@@ -173,11 +172,7 @@ impl ScanRegion {
|
||||
total_ssts
|
||||
);
|
||||
|
||||
let predicate = Predicate::try_new(
|
||||
self.request.filters.clone(),
|
||||
self.version.metadata.schema.clone(),
|
||||
)
|
||||
.context(BuildPredicateSnafu)?;
|
||||
let predicate = Predicate::new(self.request.filters.clone());
|
||||
let mapper = match &self.request.projection {
|
||||
Some(p) => ProjectionMapper::new(&self.version.metadata, p.iter().copied())?,
|
||||
None => ProjectionMapper::all(&self.version.metadata)?,
|
||||
|
||||
@@ -119,7 +119,8 @@ impl RegionOpener {
|
||||
&expect.column_metadatas,
|
||||
&expect.primary_key,
|
||||
)?;
|
||||
|
||||
// To keep consistence with Create behavior, set the opened Region writable.
|
||||
region.set_writable(true);
|
||||
return Ok(region);
|
||||
}
|
||||
Ok(None) => {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
mod format;
|
||||
pub mod reader;
|
||||
pub mod row_group;
|
||||
mod stats;
|
||||
pub mod writer;
|
||||
|
||||
|
||||
@@ -188,8 +188,9 @@ impl ParquetReaderBuilder {
|
||||
&read_format,
|
||||
column_ids,
|
||||
);
|
||||
|
||||
let pruned_row_groups = predicate
|
||||
.prune_with_stats(&stats)
|
||||
.prune_with_stats(&stats, read_format.metadata().schema.arrow_schema())
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, valid)| if valid { Some(idx) } else { None })
|
||||
|
||||
230
src/mito2/src/sst/parquet/row_group.rs
Normal file
230
src/mito2/src/sst/parquet/row_group.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Ports private structs from [parquet crate](https://github.com/apache/arrow-rs/blob/7e134f4d277c0b62c27529fc15a4739de3ad0afd/parquet/src/arrow/async_reader/mod.rs#L644-L650).
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::{Buf, Bytes};
|
||||
use parquet::arrow::arrow_reader::{RowGroups, RowSelection};
|
||||
use parquet::arrow::async_reader::AsyncFileReader;
|
||||
use parquet::arrow::ProjectionMask;
|
||||
use parquet::column::page::{PageIterator, PageReader};
|
||||
use parquet::errors::{ParquetError, Result};
|
||||
use parquet::file::metadata::RowGroupMetaData;
|
||||
use parquet::file::reader::{ChunkReader, Length};
|
||||
use parquet::file::serialized_reader::SerializedPageReader;
|
||||
use parquet::format::PageLocation;
|
||||
|
||||
/// An in-memory collection of column chunks
|
||||
pub struct InMemoryRowGroup<'a> {
|
||||
metadata: &'a RowGroupMetaData,
|
||||
page_locations: Option<&'a [Vec<PageLocation>]>,
|
||||
column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
|
||||
row_count: usize,
|
||||
}
|
||||
|
||||
impl<'a> InMemoryRowGroup<'a> {
|
||||
/// Fetches the necessary column data into memory
|
||||
// TODO(yingwen): Fix clippy warnings.
|
||||
#[allow(clippy::filter_map_bool_then)]
|
||||
#[allow(clippy::useless_conversion)]
|
||||
pub async fn fetch<T: AsyncFileReader + Send>(
|
||||
&mut self,
|
||||
input: &mut T,
|
||||
projection: &ProjectionMask,
|
||||
selection: Option<&RowSelection>,
|
||||
) -> Result<()> {
|
||||
if let Some((selection, page_locations)) = selection.zip(self.page_locations) {
|
||||
// If we have a `RowSelection` and an `OffsetIndex` then only fetch pages required for the
|
||||
// `RowSelection`
|
||||
let mut page_start_offsets: Vec<Vec<usize>> = vec![];
|
||||
|
||||
let fetch_ranges = self
|
||||
.column_chunks
|
||||
.iter()
|
||||
.zip(self.metadata.columns())
|
||||
.enumerate()
|
||||
.filter_map(|(idx, (chunk, chunk_meta))| {
|
||||
(chunk.is_none() && projection.leaf_included(idx)).then(|| {
|
||||
// If the first page does not start at the beginning of the column,
|
||||
// then we need to also fetch a dictionary page.
|
||||
let mut ranges = vec![];
|
||||
let (start, _len) = chunk_meta.byte_range();
|
||||
match page_locations[idx].first() {
|
||||
Some(first) if first.offset as u64 != start => {
|
||||
ranges.push(start as usize..first.offset as usize);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
ranges.extend(selection.scan_ranges(&page_locations[idx]));
|
||||
page_start_offsets.push(ranges.iter().map(|range| range.start).collect());
|
||||
|
||||
ranges
|
||||
})
|
||||
})
|
||||
.flatten()
|
||||
.collect();
|
||||
|
||||
let mut chunk_data = input.get_byte_ranges(fetch_ranges).await?.into_iter();
|
||||
let mut page_start_offsets = page_start_offsets.into_iter();
|
||||
|
||||
for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
|
||||
if chunk.is_some() || !projection.leaf_included(idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(offsets) = page_start_offsets.next() {
|
||||
let mut chunks = Vec::with_capacity(offsets.len());
|
||||
for _ in 0..offsets.len() {
|
||||
chunks.push(chunk_data.next().unwrap());
|
||||
}
|
||||
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Sparse {
|
||||
length: self.metadata.column(idx).byte_range().1 as usize,
|
||||
data: offsets.into_iter().zip(chunks.into_iter()).collect(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let fetch_ranges = self
|
||||
.column_chunks
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, chunk)| {
|
||||
(chunk.is_none() && projection.leaf_included(idx)).then(|| {
|
||||
let column = self.metadata.column(idx);
|
||||
let (start, length) = column.byte_range();
|
||||
start as usize..(start + length) as usize
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut chunk_data = input.get_byte_ranges(fetch_ranges).await?.into_iter();
|
||||
|
||||
for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
|
||||
if chunk.is_some() || !projection.leaf_included(idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(data) = chunk_data.next() {
|
||||
*chunk = Some(Arc::new(ColumnChunkData::Dense {
|
||||
offset: self.metadata.column(idx).byte_range().0 as usize,
|
||||
data,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> RowGroups for InMemoryRowGroup<'a> {
|
||||
fn num_rows(&self) -> usize {
|
||||
self.row_count
|
||||
}
|
||||
|
||||
fn column_chunks(&self, i: usize) -> Result<Box<dyn PageIterator>> {
|
||||
match &self.column_chunks[i] {
|
||||
None => Err(ParquetError::General(format!(
|
||||
"Invalid column index {i}, column was not fetched"
|
||||
))),
|
||||
Some(data) => {
|
||||
let page_locations = self.page_locations.map(|index| index[i].clone());
|
||||
let page_reader: Box<dyn PageReader> = Box::new(SerializedPageReader::new(
|
||||
data.clone(),
|
||||
self.metadata.column(i),
|
||||
self.row_count,
|
||||
page_locations,
|
||||
)?);
|
||||
|
||||
Ok(Box::new(ColumnChunkIterator {
|
||||
reader: Some(Ok(page_reader)),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An in-memory column chunk
|
||||
#[derive(Clone)]
|
||||
enum ColumnChunkData {
|
||||
/// Column chunk data representing only a subset of data pages
|
||||
Sparse {
|
||||
/// Length of the full column chunk
|
||||
length: usize,
|
||||
/// Set of data pages included in this sparse chunk. Each element is a tuple
|
||||
/// of (page offset, page data)
|
||||
data: Vec<(usize, Bytes)>,
|
||||
},
|
||||
/// Full column chunk and its offset
|
||||
Dense { offset: usize, data: Bytes },
|
||||
}
|
||||
|
||||
impl ColumnChunkData {
|
||||
fn get(&self, start: u64) -> Result<Bytes> {
|
||||
match &self {
|
||||
ColumnChunkData::Sparse { data, .. } => data
|
||||
.binary_search_by_key(&start, |(offset, _)| *offset as u64)
|
||||
.map(|idx| data[idx].1.clone())
|
||||
.map_err(|_| {
|
||||
ParquetError::General(format!(
|
||||
"Invalid offset in sparse column chunk data: {start}"
|
||||
))
|
||||
}),
|
||||
ColumnChunkData::Dense { offset, data } => {
|
||||
let start = start as usize - *offset;
|
||||
Ok(data.slice(start..))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Length for ColumnChunkData {
|
||||
fn len(&self) -> u64 {
|
||||
match &self {
|
||||
ColumnChunkData::Sparse { length, .. } => *length as u64,
|
||||
ColumnChunkData::Dense { data, .. } => data.len() as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ChunkReader for ColumnChunkData {
|
||||
type T = bytes::buf::Reader<Bytes>;
|
||||
|
||||
fn get_read(&self, start: u64) -> Result<Self::T> {
|
||||
Ok(self.get(start)?.reader())
|
||||
}
|
||||
|
||||
fn get_bytes(&self, start: u64, length: usize) -> Result<Bytes> {
|
||||
Ok(self.get(start)?.slice(..length))
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements [`PageIterator`] for a single column chunk, yielding a single [`PageReader`]
|
||||
struct ColumnChunkIterator {
|
||||
reader: Option<Result<Box<dyn PageReader>>>,
|
||||
}
|
||||
|
||||
impl Iterator for ColumnChunkIterator {
|
||||
type Item = Result<Box<dyn PageReader>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.reader.take()
|
||||
}
|
||||
}
|
||||
|
||||
impl PageIterator for ColumnChunkIterator {}
|
||||
@@ -99,6 +99,15 @@ impl TestEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a new env with specific `data_home` for test.
|
||||
pub fn with_data_home(data_home: TempDir) -> TestEnv {
|
||||
TestEnv {
|
||||
data_home,
|
||||
logstore: None,
|
||||
object_store: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_logstore(&self) -> Option<Arc<RaftEngineLogStore>> {
|
||||
self.logstore.clone()
|
||||
}
|
||||
|
||||
@@ -7,6 +7,8 @@ license.workspace = true
|
||||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
bytes = "1.4"
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
futures.workspace = true
|
||||
@@ -17,6 +19,7 @@ opendal = { version = "0.40", features = [
|
||||
"layers-tracing",
|
||||
"layers-metrics",
|
||||
] }
|
||||
snafu.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
45
src/object-store/src/error.rs
Normal file
45
src/object-store/src/error.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Default storage not found: {}", default_object_store))]
|
||||
DefaultStorageNotFound {
|
||||
location: Location,
|
||||
default_object_store: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::DefaultStorageNotFound { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,9 @@ pub use opendal::{
|
||||
Operator as ObjectStore, Reader, Result, Writer,
|
||||
};
|
||||
|
||||
pub mod error;
|
||||
pub mod layers;
|
||||
pub mod manager;
|
||||
mod metrics;
|
||||
pub mod test_util;
|
||||
pub mod util;
|
||||
|
||||
107
src/object-store/src/manager.rs
Normal file
107
src/object-store/src/manager.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{DefaultStorageNotFoundSnafu, Result};
|
||||
use crate::ObjectStore;
|
||||
|
||||
/// Manages multiple object stores so that users can configure a storage for each table.
|
||||
/// This struct certainly have one default object store, and can have zero or more custom object stores.
|
||||
pub struct ObjectStoreManager {
|
||||
stores: HashMap<String, ObjectStore>,
|
||||
default_object_store: ObjectStore,
|
||||
}
|
||||
|
||||
impl ObjectStoreManager {
|
||||
/// Creates a new manager with specific object stores. Returns an error if `stores` doesn't contain the default object store.
|
||||
pub fn try_new(
|
||||
stores: HashMap<String, ObjectStore>,
|
||||
default_object_store: &str,
|
||||
) -> Result<Self> {
|
||||
let default_object_store = stores
|
||||
.get(default_object_store)
|
||||
.context(DefaultStorageNotFoundSnafu {
|
||||
default_object_store,
|
||||
})?
|
||||
.clone();
|
||||
Ok(ObjectStoreManager {
|
||||
stores,
|
||||
default_object_store,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find(&self, name: &str) -> Option<&ObjectStore> {
|
||||
self.stores.get(name)
|
||||
}
|
||||
|
||||
pub fn default_object_store(&self) -> &ObjectStore {
|
||||
&self.default_object_store
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
|
||||
use super::ObjectStoreManager;
|
||||
use crate::error::Error;
|
||||
use crate::services::Fs as Builder;
|
||||
use crate::ObjectStore;
|
||||
|
||||
fn new_object_store(dir: &TempDir) -> ObjectStore {
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let mut builder = Builder::default();
|
||||
let _ = builder.root(store_dir);
|
||||
ObjectStore::new(builder).unwrap().finish()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_returns_err_when_global_store_not_exist() {
|
||||
let dir = create_temp_dir("new");
|
||||
let object_store = new_object_store(&dir);
|
||||
let stores: HashMap<String, ObjectStore> = vec![
|
||||
("File".to_string(), object_store.clone()),
|
||||
("S3".to_string(), object_store.clone()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
assert!(matches!(
|
||||
ObjectStoreManager::try_new(stores, "Gcs"),
|
||||
Err(Error::DefaultStorageNotFound { .. })
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_returns_ok() {
|
||||
let dir = create_temp_dir("new");
|
||||
let object_store = new_object_store(&dir);
|
||||
let stores: HashMap<String, ObjectStore> = vec![
|
||||
("File".to_string(), object_store.clone()),
|
||||
("S3".to_string(), object_store.clone()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let object_store_manager = ObjectStoreManager::try_new(stores, "File").unwrap();
|
||||
assert_eq!(object_store_manager.stores.len(), 2);
|
||||
assert!(object_store_manager.find("File").is_some());
|
||||
assert!(object_store_manager.find("S3").is_some());
|
||||
assert!(object_store_manager.find("Gcs").is_none());
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ catalog = { workspace = true }
|
||||
common-catalog = { workspace = true }
|
||||
common-error = { workspace = true }
|
||||
common-macro = { workspace = true }
|
||||
common-recordbatch = { workspace = true }
|
||||
common-telemetry = { workspace = true }
|
||||
datafusion.workspace = true
|
||||
datatypes = { workspace = true }
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod empty_metric;
|
||||
mod histogram_fold;
|
||||
mod instant_manipulate;
|
||||
mod normalize;
|
||||
mod planner;
|
||||
|
||||
798
src/promql/src/extension_plan/histogram_fold.rs
Normal file
798
src/promql/src/extension_plan/histogram_fold.rs
Normal file
@@ -0,0 +1,798 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::task::Poll;
|
||||
use std::time::Instant;
|
||||
|
||||
use common_recordbatch::RecordBatch as GtRecordBatch;
|
||||
use common_telemetry::warn;
|
||||
use datafusion::arrow::array::AsArray;
|
||||
use datafusion::arrow::compute::{self, concat_batches, SortOptions};
|
||||
use datafusion::arrow::datatypes::{DataType, Field, Float64Type, SchemaRef};
|
||||
use datafusion::arrow::record_batch::RecordBatch;
|
||||
use datafusion::common::{DFField, DFSchema, DFSchemaRef};
|
||||
use datafusion::error::{DataFusionError, Result as DataFusionResult};
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
|
||||
use datafusion::physical_expr::{PhysicalSortExpr, PhysicalSortRequirement};
|
||||
use datafusion::physical_plan::expressions::Column as PhyColumn;
|
||||
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
|
||||
use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr,
|
||||
RecordBatchStream, SendableRecordBatchStream, Statistics,
|
||||
};
|
||||
use datafusion::prelude::{Column, Expr};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType as GtDataType};
|
||||
use datatypes::schema::Schema as GtSchema;
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use datatypes::vectors::MutableVector;
|
||||
use futures::{ready, Stream, StreamExt};
|
||||
|
||||
/// `HistogramFold` will fold the conventional (non-native) histogram ([1]) for later
|
||||
/// computing. Specifically, it will transform the `le` and `field` column into a complex
|
||||
/// type, and samples on other tag columns:
|
||||
/// - `le` will become a [ListArray] of [f64]. With each bucket bound parsed
|
||||
/// - `field` will become a [ListArray] of [f64]
|
||||
/// - other columns will be sampled every `bucket_num` element, but their types won't change.
|
||||
///
|
||||
/// Due to the folding or sampling, the output rows number will become `input_rows` / `bucket_num`.
|
||||
///
|
||||
/// # Requirement
|
||||
/// - Input should be sorted on `<tag list>, le ASC, ts`.
|
||||
/// - The value set of `le` should be same. I.e., buckets of every series should be same.
|
||||
///
|
||||
/// [1]: https://prometheus.io/docs/concepts/metric_types/#histogram
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
pub struct HistogramFold {
|
||||
/// Name of the `le` column. It's a special column in prometheus
|
||||
/// for implementing conventional histogram. It's a string column
|
||||
/// with "literal" float value, like "+Inf", "0.001" etc.
|
||||
le_column: String,
|
||||
ts_column: String,
|
||||
input: LogicalPlan,
|
||||
field_column: String,
|
||||
output_schema: DFSchemaRef,
|
||||
}
|
||||
|
||||
impl UserDefinedLogicalNodeCore for HistogramFold {
|
||||
fn name(&self) -> &str {
|
||||
Self::name()
|
||||
}
|
||||
|
||||
fn inputs(&self) -> Vec<&LogicalPlan> {
|
||||
vec![&self.input]
|
||||
}
|
||||
|
||||
fn schema(&self) -> &DFSchemaRef {
|
||||
&self.output_schema
|
||||
}
|
||||
|
||||
fn expressions(&self) -> Vec<Expr> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn fmt_for_explain(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"HistogramFold: le={}, field={}",
|
||||
self.le_column, self.field_column
|
||||
)
|
||||
}
|
||||
|
||||
fn from_template(&self, _exprs: &[Expr], inputs: &[LogicalPlan]) -> Self {
|
||||
Self {
|
||||
le_column: self.le_column.clone(),
|
||||
ts_column: self.ts_column.clone(),
|
||||
input: inputs[0].clone(),
|
||||
field_column: self.field_column.clone(),
|
||||
// This method cannot return error. Otherwise we should re-calculate
|
||||
// the output schema
|
||||
output_schema: self.output_schema.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HistogramFold {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(
|
||||
le_column: String,
|
||||
field_column: String,
|
||||
ts_column: String,
|
||||
input: LogicalPlan,
|
||||
) -> DataFusionResult<Self> {
|
||||
let input_schema = input.schema();
|
||||
Self::check_schema(input_schema, &le_column, &field_column, &ts_column)?;
|
||||
let output_schema = Self::convert_schema(input_schema, &le_column, &field_column)?;
|
||||
Ok(Self {
|
||||
le_column,
|
||||
ts_column,
|
||||
input,
|
||||
field_column,
|
||||
output_schema,
|
||||
})
|
||||
}
|
||||
|
||||
pub const fn name() -> &'static str {
|
||||
"HistogramFold"
|
||||
}
|
||||
|
||||
fn check_schema(
|
||||
input_schema: &DFSchemaRef,
|
||||
le_column: &str,
|
||||
field_column: &str,
|
||||
ts_column: &str,
|
||||
) -> DataFusionResult<()> {
|
||||
let check_column = |col| {
|
||||
if !input_schema.has_column_with_unqualified_name(col) {
|
||||
return Err(DataFusionError::SchemaError(
|
||||
datafusion::common::SchemaError::FieldNotFound {
|
||||
field: Box::new(Column::new(None::<String>, col)),
|
||||
valid_fields: input_schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| f.qualified_column())
|
||||
.collect(),
|
||||
},
|
||||
));
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
};
|
||||
|
||||
check_column(le_column)?;
|
||||
check_column(ts_column)?;
|
||||
check_column(field_column)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn to_execution_plan(&self, exec_input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
|
||||
let input_schema = self.input.schema();
|
||||
// safety: those fields are checked in `check_schema()`
|
||||
let le_column_index = input_schema
|
||||
.index_of_column_by_name(None, &self.le_column)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let field_column_index = input_schema
|
||||
.index_of_column_by_name(None, &self.field_column)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
let ts_column_index = input_schema
|
||||
.index_of_column_by_name(None, &self.ts_column)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
Arc::new(HistogramFoldExec {
|
||||
le_column_index,
|
||||
field_column_index,
|
||||
ts_column_index,
|
||||
input: exec_input,
|
||||
output_schema: Arc::new(self.output_schema.as_ref().into()),
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Transform the schema
|
||||
///
|
||||
/// - `le` will become a [ListArray] of [f64]. With each bucket bound parsed
|
||||
/// - `field` will become a [ListArray] of [f64]
|
||||
fn convert_schema(
|
||||
input_schema: &DFSchemaRef,
|
||||
le_column: &str,
|
||||
field_column: &str,
|
||||
) -> DataFusionResult<DFSchemaRef> {
|
||||
let mut fields = input_schema.fields().clone();
|
||||
// safety: those fields are checked in `check_schema()`
|
||||
let le_column_idx = input_schema
|
||||
.index_of_column_by_name(None, le_column)?
|
||||
.unwrap();
|
||||
let field_column_idx = input_schema
|
||||
.index_of_column_by_name(None, field_column)?
|
||||
.unwrap();
|
||||
|
||||
// transform `le`
|
||||
let le_field: Field = fields[le_column_idx].field().as_ref().clone();
|
||||
let le_field = le_field.with_data_type(DataType::Float64);
|
||||
let folded_le_datatype = DataType::List(Arc::new(le_field));
|
||||
let folded_le = DFField::new(
|
||||
fields[le_column_idx].qualifier().cloned(),
|
||||
fields[le_column_idx].name(),
|
||||
folded_le_datatype,
|
||||
false,
|
||||
);
|
||||
|
||||
// transform `field`
|
||||
// to avoid ambiguity, that field will be referenced as `the_field` below.
|
||||
let the_field: Field = fields[field_column_idx].field().as_ref().clone();
|
||||
let folded_field_datatype = DataType::List(Arc::new(the_field));
|
||||
let folded_field = DFField::new(
|
||||
fields[field_column_idx].qualifier().cloned(),
|
||||
fields[field_column_idx].name(),
|
||||
folded_field_datatype,
|
||||
false,
|
||||
);
|
||||
|
||||
fields[le_column_idx] = folded_le;
|
||||
fields[field_column_idx] = folded_field;
|
||||
|
||||
Ok(Arc::new(DFSchema::new_with_metadata(
|
||||
fields,
|
||||
HashMap::new(),
|
||||
)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct HistogramFoldExec {
|
||||
/// Index for `le` column in the schema of input.
|
||||
le_column_index: usize,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
output_schema: SchemaRef,
|
||||
/// Index for field column in the schema of input.
|
||||
field_column_index: usize,
|
||||
ts_column_index: usize,
|
||||
metric: ExecutionPlanMetricsSet,
|
||||
}
|
||||
|
||||
impl ExecutionPlan for HistogramFoldExec {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.output_schema.clone()
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> Partitioning {
|
||||
self.input.output_partitioning()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
|
||||
self.input.output_ordering()
|
||||
}
|
||||
|
||||
fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
|
||||
let mut cols = self
|
||||
.tag_col_exprs()
|
||||
.into_iter()
|
||||
.map(|expr| PhysicalSortRequirement {
|
||||
expr,
|
||||
options: None,
|
||||
})
|
||||
.collect::<Vec<PhysicalSortRequirement>>();
|
||||
// add le ASC
|
||||
cols.push(PhysicalSortRequirement {
|
||||
expr: Arc::new(PhyColumn::new(
|
||||
self.output_schema.field(self.le_column_index).name(),
|
||||
self.le_column_index,
|
||||
)),
|
||||
options: Some(SortOptions {
|
||||
descending: false, // +INF in the last
|
||||
nulls_first: false, // not nullable
|
||||
}),
|
||||
});
|
||||
// add ts
|
||||
cols.push(PhysicalSortRequirement {
|
||||
expr: Arc::new(PhyColumn::new(
|
||||
self.output_schema.field(self.ts_column_index).name(),
|
||||
self.ts_column_index,
|
||||
)),
|
||||
options: None,
|
||||
});
|
||||
|
||||
vec![Some(cols)]
|
||||
}
|
||||
|
||||
fn required_input_distribution(&self) -> Vec<Distribution> {
|
||||
// partition on all tag columns, i.e., non-le, non-ts and non-field columns
|
||||
vec![Distribution::HashPartitioned(self.tag_col_exprs())]
|
||||
}
|
||||
|
||||
fn maintains_input_order(&self) -> Vec<bool> {
|
||||
vec![true; self.children().len()]
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
|
||||
vec![self.input.clone()]
|
||||
}
|
||||
|
||||
// cannot change schema with this method
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||
assert!(!children.is_empty());
|
||||
Ok(Arc::new(Self {
|
||||
input: children[0].clone(),
|
||||
metric: self.metric.clone(),
|
||||
le_column_index: self.le_column_index,
|
||||
ts_column_index: self.ts_column_index,
|
||||
output_schema: self.output_schema.clone(),
|
||||
field_column_index: self.field_column_index,
|
||||
}))
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> DataFusionResult<SendableRecordBatchStream> {
|
||||
let baseline_metric = BaselineMetrics::new(&self.metric, partition);
|
||||
|
||||
let batch_size = context.session_config().batch_size();
|
||||
let input = self.input.execute(partition, context)?;
|
||||
let output_schema = self.output_schema.clone();
|
||||
|
||||
let mut normal_indices = (0..output_schema.fields().len()).collect::<HashSet<_>>();
|
||||
normal_indices.remove(&self.le_column_index);
|
||||
normal_indices.remove(&self.field_column_index);
|
||||
Ok(Box::pin(HistogramFoldStream {
|
||||
le_column_index: self.le_column_index,
|
||||
field_column_index: self.field_column_index,
|
||||
normal_indices: normal_indices.into_iter().collect(),
|
||||
bucket_size: None,
|
||||
input_buffer: vec![],
|
||||
input,
|
||||
output_schema,
|
||||
metric: baseline_metric,
|
||||
batch_size,
|
||||
input_buffered_rows: 0,
|
||||
output_buffer: HistogramFoldStream::empty_output_buffer(&self.output_schema)?,
|
||||
output_buffered_rows: 0,
|
||||
}))
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metric.clone_inner())
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
Statistics {
|
||||
num_rows: None,
|
||||
total_byte_size: None,
|
||||
column_statistics: None,
|
||||
is_exact: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HistogramFoldExec {
|
||||
/// Return all the [PhysicalExpr] of tag columns in order.
|
||||
///
|
||||
/// Tag columns are all columns except `le`, `field` and `ts` columns.
|
||||
pub fn tag_col_exprs(&self) -> Vec<Arc<dyn PhysicalExpr>> {
|
||||
self.input
|
||||
.schema()
|
||||
.fields()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, field)| {
|
||||
if idx == self.le_column_index
|
||||
|| idx == self.field_column_index
|
||||
|| idx == self.ts_column_index
|
||||
{
|
||||
None
|
||||
} else {
|
||||
Some(Arc::new(PhyColumn::new(field.name(), idx)) as _)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for HistogramFoldExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(
|
||||
f,
|
||||
"HistogramFoldExec: le=@{}, field=@{}",
|
||||
self.le_column_index, self.field_column_index
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HistogramFoldStream {
|
||||
// internal states
|
||||
le_column_index: usize,
|
||||
field_column_index: usize,
|
||||
/// Columns need not folding
|
||||
normal_indices: Vec<usize>,
|
||||
bucket_size: Option<usize>,
|
||||
/// Expected output batch size
|
||||
batch_size: usize,
|
||||
output_schema: SchemaRef,
|
||||
|
||||
// buffers
|
||||
input_buffer: Vec<RecordBatch>,
|
||||
input_buffered_rows: usize,
|
||||
output_buffer: Vec<Box<dyn MutableVector>>,
|
||||
output_buffered_rows: usize,
|
||||
|
||||
// runtime things
|
||||
input: SendableRecordBatchStream,
|
||||
metric: BaselineMetrics,
|
||||
}
|
||||
|
||||
impl RecordBatchStream for HistogramFoldStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.output_schema.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for HistogramFoldStream {
|
||||
type Item = DataFusionResult<RecordBatch>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: std::pin::Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
let poll = loop {
|
||||
match ready!(self.input.poll_next_unpin(cx)) {
|
||||
Some(batch) => {
|
||||
let batch = batch?;
|
||||
let timer = Instant::now();
|
||||
let Some(result) = self.fold_input(batch)? else {
|
||||
self.metric.elapsed_compute().add_elapsed(timer);
|
||||
continue;
|
||||
};
|
||||
self.metric.elapsed_compute().add_elapsed(timer);
|
||||
break Poll::Ready(Some(result));
|
||||
}
|
||||
None => break Poll::Ready(self.take_output_buf()?.map(Ok)),
|
||||
}
|
||||
};
|
||||
self.metric.record_poll(poll)
|
||||
}
|
||||
}
|
||||
|
||||
impl HistogramFoldStream {
|
||||
/// The inner most `Result` is for `poll_next()`
|
||||
pub fn fold_input(
|
||||
&mut self,
|
||||
input: RecordBatch,
|
||||
) -> DataFusionResult<Option<DataFusionResult<RecordBatch>>> {
|
||||
let Some(bucket_num) = self.calculate_bucket_num(&input)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if self.input_buffered_rows + input.num_rows() < bucket_num {
|
||||
// not enough rows to fold
|
||||
self.push_input_buf(input);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
self.fold_buf(bucket_num, input)?;
|
||||
if self.output_buffered_rows >= self.batch_size {
|
||||
return Ok(self.take_output_buf()?.map(Ok));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn empty_output_buffer(
|
||||
schema: &SchemaRef,
|
||||
) -> DataFusionResult<Vec<Box<dyn MutableVector>>> {
|
||||
let mut builders = Vec::with_capacity(schema.fields().len());
|
||||
for field in schema.fields() {
|
||||
let concrete_datatype = ConcreteDataType::try_from(field.data_type()).unwrap();
|
||||
let mutable_vector = concrete_datatype.create_mutable_vector(0);
|
||||
builders.push(mutable_vector);
|
||||
}
|
||||
|
||||
Ok(builders)
|
||||
}
|
||||
|
||||
fn calculate_bucket_num(&mut self, batch: &RecordBatch) -> DataFusionResult<Option<usize>> {
|
||||
if let Some(size) = self.bucket_size {
|
||||
return Ok(Some(size));
|
||||
}
|
||||
|
||||
let inf_pos = self.find_positive_inf(batch)?;
|
||||
if inf_pos == batch.num_rows() {
|
||||
// no positive inf found, append to buffer and wait for next batch
|
||||
self.push_input_buf(batch.clone());
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// else we found the positive inf.
|
||||
// calculate the bucket size
|
||||
let bucket_size = inf_pos + self.input_buffered_rows + 1;
|
||||
Ok(Some(bucket_size))
|
||||
}
|
||||
|
||||
/// Fold record batches from input buffer and put to output buffer
|
||||
fn fold_buf(&mut self, bucket_num: usize, input: RecordBatch) -> DataFusionResult<()> {
|
||||
self.push_input_buf(input);
|
||||
// TODO(ruihang): this concat is avoidable.
|
||||
let batch = concat_batches(&self.input.schema(), self.input_buffer.drain(..).as_ref())?;
|
||||
let mut remaining_rows = self.input_buffered_rows;
|
||||
let mut cursor = 0;
|
||||
|
||||
let gt_schema = GtSchema::try_from(self.input.schema()).unwrap();
|
||||
let batch = GtRecordBatch::try_from_df_record_batch(Arc::new(gt_schema), batch).unwrap();
|
||||
|
||||
while remaining_rows >= bucket_num {
|
||||
// "sample" normal columns
|
||||
for normal_index in &self.normal_indices {
|
||||
let val = batch.column(*normal_index).get(cursor);
|
||||
self.output_buffer[*normal_index].push_value_ref(val.as_value_ref());
|
||||
}
|
||||
// "fold" `le` and field columns
|
||||
let le_array = batch.column(self.le_column_index);
|
||||
let field_array = batch.column(self.field_column_index);
|
||||
let mut le_item = vec![];
|
||||
let mut field_item = vec![];
|
||||
for bias in 0..bucket_num {
|
||||
let le_str_val = le_array.get(cursor + bias);
|
||||
let le_str_val_ref = le_str_val.as_value_ref();
|
||||
let le_str = le_str_val_ref
|
||||
.as_string()
|
||||
.unwrap()
|
||||
.expect("le column should not be nullable");
|
||||
let le = le_str.parse::<f64>().unwrap();
|
||||
let le_val = Value::from(le);
|
||||
le_item.push(le_val);
|
||||
|
||||
let field = field_array.get(cursor + bias);
|
||||
field_item.push(field);
|
||||
}
|
||||
let le_list_val = Value::List(ListValue::new(
|
||||
Some(Box::new(le_item)),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
));
|
||||
let field_list_val = Value::List(ListValue::new(
|
||||
Some(Box::new(field_item)),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
));
|
||||
self.output_buffer[self.le_column_index].push_value_ref(le_list_val.as_value_ref());
|
||||
self.output_buffer[self.field_column_index]
|
||||
.push_value_ref(field_list_val.as_value_ref());
|
||||
|
||||
cursor += bucket_num;
|
||||
remaining_rows -= bucket_num;
|
||||
self.output_buffered_rows += 1;
|
||||
}
|
||||
|
||||
let remaining_input_batch = batch.into_df_record_batch().slice(cursor, remaining_rows);
|
||||
self.input_buffered_rows = remaining_input_batch.num_rows();
|
||||
self.input_buffer.push(remaining_input_batch);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn push_input_buf(&mut self, batch: RecordBatch) {
|
||||
self.input_buffered_rows += batch.num_rows();
|
||||
self.input_buffer.push(batch);
|
||||
}
|
||||
|
||||
fn take_output_buf(&mut self) -> DataFusionResult<Option<RecordBatch>> {
|
||||
if self.output_buffered_rows == 0 {
|
||||
if self.input_buffered_rows != 0 {
|
||||
warn!(
|
||||
"input buffer is not empty, {} rows remaining",
|
||||
self.input_buffered_rows
|
||||
);
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut output_buf = Self::empty_output_buffer(&self.output_schema)?;
|
||||
std::mem::swap(&mut self.output_buffer, &mut output_buf);
|
||||
let mut columns = Vec::with_capacity(output_buf.len());
|
||||
for builder in output_buf.iter_mut() {
|
||||
columns.push(builder.to_vector().to_arrow_array());
|
||||
}
|
||||
|
||||
// overwrite default list datatype to change field name
|
||||
columns[self.le_column_index] = compute::cast(
|
||||
&columns[self.le_column_index],
|
||||
self.output_schema.field(self.le_column_index).data_type(),
|
||||
)?;
|
||||
columns[self.field_column_index] = compute::cast(
|
||||
&columns[self.field_column_index],
|
||||
self.output_schema
|
||||
.field(self.field_column_index)
|
||||
.data_type(),
|
||||
)?;
|
||||
|
||||
self.output_buffered_rows = 0;
|
||||
RecordBatch::try_new(self.output_schema.clone(), columns)
|
||||
.map(Some)
|
||||
.map_err(DataFusionError::ArrowError)
|
||||
}
|
||||
|
||||
/// Find the first `+Inf` which indicates the end of the bucket group
|
||||
///
|
||||
/// If the return value equals to batch's num_rows means the it's not found
|
||||
/// in this batch
|
||||
fn find_positive_inf(&self, batch: &RecordBatch) -> DataFusionResult<usize> {
|
||||
// fuse this function. It should not be called when the
|
||||
// bucket size is already know.
|
||||
if let Some(bucket_size) = self.bucket_size {
|
||||
return Ok(bucket_size);
|
||||
}
|
||||
let string_le_array = batch.column(self.le_column_index);
|
||||
let float_le_array = compute::cast(&string_le_array, &DataType::Float64).map_err(|e| {
|
||||
DataFusionError::Execution(format!(
|
||||
"cannot cast {} array to float64 array: {:?}",
|
||||
string_le_array.data_type(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
let le_as_f64_array = float_le_array
|
||||
.as_primitive_opt::<Float64Type>()
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Execution(format!(
|
||||
"expect a float64 array, but found {}",
|
||||
float_le_array.data_type()
|
||||
))
|
||||
})?;
|
||||
for (i, v) in le_as_f64_array.iter().enumerate() {
|
||||
if let Some(v) = v && v == f64::INFINITY {
|
||||
return Ok(i);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(batch.num_rows())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::arrow::array::Float64Array;
|
||||
use datafusion::arrow::datatypes::Schema;
|
||||
use datafusion::common::ToDFSchema;
|
||||
use datafusion::physical_plan::memory::MemoryExec;
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datatypes::arrow_array::StringArray;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn prepare_test_data() -> MemoryExec {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("host", DataType::Utf8, true),
|
||||
Field::new("le", DataType::Utf8, true),
|
||||
Field::new("val", DataType::Float64, true),
|
||||
]));
|
||||
|
||||
// 12 items
|
||||
let host_column_1 = Arc::new(StringArray::from(vec![
|
||||
"host_1", "host_1", "host_1", "host_1", "host_1", "host_1", "host_1", "host_1",
|
||||
"host_1", "host_1", "host_1", "host_1",
|
||||
])) as _;
|
||||
let le_column_1 = Arc::new(StringArray::from(vec![
|
||||
"0.001", "0.1", "10", "1000", "+Inf", "0.001", "0.1", "10", "1000", "+inf", "0.001",
|
||||
"0.1",
|
||||
])) as _;
|
||||
let val_column_1 = Arc::new(Float64Array::from(vec![
|
||||
0_0.0, 1.0, 1.0, 5.0, 5.0, 0_0.0, 20.0, 60.0, 70.0, 100.0, 0_1.0, 1.0,
|
||||
])) as _;
|
||||
|
||||
// 2 items
|
||||
let host_column_2 = Arc::new(StringArray::from(vec!["host_1", "host_1"])) as _;
|
||||
let le_column_2 = Arc::new(StringArray::from(vec!["10", "1000"])) as _;
|
||||
let val_column_2 = Arc::new(Float64Array::from(vec![1.0, 1.0])) as _;
|
||||
|
||||
// 11 items
|
||||
let host_column_3 = Arc::new(StringArray::from(vec![
|
||||
"host_1", "host_2", "host_2", "host_2", "host_2", "host_2", "host_2", "host_2",
|
||||
"host_2", "host_2", "host_2",
|
||||
])) as _;
|
||||
let le_column_3 = Arc::new(StringArray::from(vec![
|
||||
"+INF", "0.001", "0.1", "10", "1000", "+iNf", "0.001", "0.1", "10", "1000", "+Inf",
|
||||
])) as _;
|
||||
let val_column_3 = Arc::new(Float64Array::from(vec![
|
||||
1.0, 0_0.0, 0.0, 0.0, 0.0, 0.0, 0_0.0, 1.0, 2.0, 3.0, 4.0,
|
||||
])) as _;
|
||||
|
||||
let data_1 = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![host_column_1, le_column_1, val_column_1],
|
||||
)
|
||||
.unwrap();
|
||||
let data_2 = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![host_column_2, le_column_2, val_column_2],
|
||||
)
|
||||
.unwrap();
|
||||
let data_3 = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![host_column_3, le_column_3, val_column_3],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
MemoryExec::try_new(&[vec![data_1, data_2, data_3]], schema, None).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn fold_overall() {
|
||||
let memory_exec = Arc::new(prepare_test_data());
|
||||
let output_schema = Arc::new(
|
||||
(*HistogramFold::convert_schema(
|
||||
&Arc::new(memory_exec.schema().to_dfschema().unwrap()),
|
||||
"le",
|
||||
"val",
|
||||
)
|
||||
.unwrap()
|
||||
.as_ref())
|
||||
.clone()
|
||||
.into(),
|
||||
);
|
||||
let fold_exec = Arc::new(HistogramFoldExec {
|
||||
le_column_index: 1,
|
||||
field_column_index: 2,
|
||||
ts_column_index: 9999, // not exist but doesn't matter
|
||||
input: memory_exec,
|
||||
output_schema,
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
});
|
||||
|
||||
let session_context = SessionContext::default();
|
||||
let result = datafusion::physical_plan::collect(fold_exec, session_context.task_ctx())
|
||||
.await
|
||||
.unwrap();
|
||||
let result_literal = datatypes::arrow::util::pretty::pretty_format_batches(&result)
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
||||
let expected = String::from(
|
||||
"+--------+---------------------------------+--------------------------------+
|
||||
| host | le | val |
|
||||
+--------+---------------------------------+--------------------------------+
|
||||
| host_1 | [0.001, 0.1, 10.0, 1000.0, inf] | [0.0, 1.0, 1.0, 5.0, 5.0] |
|
||||
| host_1 | [0.001, 0.1, 10.0, 1000.0, inf] | [0.0, 20.0, 60.0, 70.0, 100.0] |
|
||||
| host_1 | [0.001, 0.1, 10.0, 1000.0, inf] | [1.0, 1.0, 1.0, 1.0, 1.0] |
|
||||
| host_2 | [0.001, 0.1, 10.0, 1000.0, inf] | [0.0, 0.0, 0.0, 0.0, 0.0] |
|
||||
| host_2 | [0.001, 0.1, 10.0, 1000.0, inf] | [0.0, 1.0, 2.0, 3.0, 4.0] |
|
||||
+--------+---------------------------------+--------------------------------+",
|
||||
);
|
||||
assert_eq!(result_literal, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn confirm_schema() {
|
||||
let input_schema = Schema::new(vec![
|
||||
Field::new("host", DataType::Utf8, true),
|
||||
Field::new("le", DataType::Utf8, true),
|
||||
Field::new("val", DataType::Float64, true),
|
||||
])
|
||||
.to_dfschema_ref()
|
||||
.unwrap();
|
||||
let expected_output_schema = Schema::new(vec![
|
||||
Field::new("host", DataType::Utf8, true),
|
||||
Field::new(
|
||||
"le",
|
||||
DataType::List(Arc::new(Field::new("le", DataType::Float64, true))),
|
||||
false,
|
||||
),
|
||||
Field::new(
|
||||
"val",
|
||||
DataType::List(Arc::new(Field::new("val", DataType::Float64, true))),
|
||||
false,
|
||||
),
|
||||
])
|
||||
.to_dfschema_ref()
|
||||
.unwrap();
|
||||
|
||||
let actual = HistogramFold::convert_schema(&input_schema, "le", "val").unwrap();
|
||||
assert_eq!(actual, expected_output_schema)
|
||||
}
|
||||
}
|
||||
@@ -61,6 +61,8 @@ use crate::functions::{
|
||||
|
||||
/// `time()` function in PromQL.
|
||||
const SPECIAL_TIME_FUNCTION: &str = "time";
|
||||
/// `histogram_quantile` function in PromQL
|
||||
const SPECIAL_HISTOGRAM_QUANTILE: &str = "histogram_quantile";
|
||||
|
||||
const DEFAULT_TIME_INDEX_COLUMN: &str = "time";
|
||||
|
||||
@@ -440,6 +442,10 @@ impl PromPlanner {
|
||||
}));
|
||||
}
|
||||
|
||||
if func.name == SPECIAL_HISTOGRAM_QUANTILE {
|
||||
todo!()
|
||||
}
|
||||
|
||||
let args = self.create_function_args(&args.args)?;
|
||||
let input = self
|
||||
.prom_expr_to_plan(args.input.with_context(|| ExpectExprSnafu {
|
||||
|
||||
@@ -316,9 +316,13 @@ impl ErrorExt for Error {
|
||||
ParseSql { source, .. } => source.status_code(),
|
||||
CreateRecordBatch { source, .. } => source.status_code(),
|
||||
QueryExecution { source, .. } | QueryPlan { source, .. } => source.status_code(),
|
||||
DataFusion { .. } | MissingTimestampColumn { .. } | RoutePartition { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
DataFusion { error, .. } => match error {
|
||||
DataFusionError::Internal(_) => StatusCode::Internal,
|
||||
DataFusionError::NotImplemented(_) => StatusCode::Unsupported,
|
||||
DataFusionError::Plan(_) => StatusCode::PlanQuery,
|
||||
_ => StatusCode::EngineExecuteQuery,
|
||||
},
|
||||
MissingTimestampColumn { .. } | RoutePartition { .. } => StatusCode::EngineExecuteQuery,
|
||||
Sql { source, .. } => source.status_code(),
|
||||
PlanSql { .. } => StatusCode::PlanQuery,
|
||||
ConvertSqlType { source, .. } | ConvertSqlValue { source, .. } => source.status_code(),
|
||||
|
||||
@@ -392,7 +392,6 @@ impl ErrorExt for Error {
|
||||
Internal { .. }
|
||||
| InternalIo { .. }
|
||||
| TokioIo { .. }
|
||||
| CollectRecordbatch { .. }
|
||||
| StartHttp { .. }
|
||||
| StartGrpc { .. }
|
||||
| AlreadyStarted { .. }
|
||||
@@ -403,6 +402,8 @@ impl ErrorExt for Error {
|
||||
| GrpcReflectionService { .. }
|
||||
| BuildHttpResponse { .. } => StatusCode::Internal,
|
||||
|
||||
CollectRecordbatch { .. } => StatusCode::EngineExecuteQuery,
|
||||
|
||||
InsertScript { source, .. }
|
||||
| ExecuteScript { source, .. }
|
||||
| ExecuteQuery { source, .. }
|
||||
|
||||
@@ -660,6 +660,7 @@ impl HttpServer {
|
||||
fn route_otlp<S>(&self, otlp_handler: OpenTelemetryProtocolHandlerRef) -> Router<S> {
|
||||
Router::new()
|
||||
.route("/v1/metrics", routing::post(otlp::metrics))
|
||||
.route("/v1/traces", routing::post(otlp::traces))
|
||||
.with_state(otlp_handler)
|
||||
}
|
||||
|
||||
|
||||
@@ -84,17 +84,19 @@ pub async fn put(
|
||||
let summary = params.contains_key("summary");
|
||||
let details = params.contains_key("details");
|
||||
|
||||
let data_points = parse_data_points(body).await?;
|
||||
let data_point_requests = parse_data_points(body).await?;
|
||||
let data_points = data_point_requests
|
||||
.iter()
|
||||
.map(|point| point.clone().into())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let response = if !summary && !details {
|
||||
for data_point in data_points.into_iter() {
|
||||
if let Err(e) = opentsdb_handler.exec(&data_point.into(), ctx.clone()).await {
|
||||
// Not debugging purpose, failed fast.
|
||||
return error::InternalSnafu {
|
||||
err_msg: e.to_string(),
|
||||
}
|
||||
.fail();
|
||||
if let Err(e) = opentsdb_handler.exec(data_points, ctx.clone()).await {
|
||||
// Not debugging purpose, failed fast.
|
||||
return error::InternalSnafu {
|
||||
err_msg: e.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(HttpStatusCode::NO_CONTENT, Json(OpentsdbPutResponse::Empty))
|
||||
} else {
|
||||
@@ -108,15 +110,11 @@ pub async fn put(
|
||||
},
|
||||
};
|
||||
|
||||
for data_point in data_points.into_iter() {
|
||||
let result = opentsdb_handler
|
||||
.exec(&data_point.clone().into(), ctx.clone())
|
||||
.await;
|
||||
for (data_point, request) in data_points.into_iter().zip(data_point_requests) {
|
||||
let result = opentsdb_handler.exec(vec![data_point], ctx.clone()).await;
|
||||
match result {
|
||||
Ok(()) => response.on_success(),
|
||||
Err(e) => {
|
||||
response.on_failed(data_point, e);
|
||||
}
|
||||
Ok(affected_rows) => response.on_success(affected_rows),
|
||||
Err(e) => response.on_failed(request, e),
|
||||
}
|
||||
}
|
||||
(
|
||||
@@ -151,8 +149,8 @@ pub struct OpentsdbDebuggingResponse {
|
||||
}
|
||||
|
||||
impl OpentsdbDebuggingResponse {
|
||||
fn on_success(&mut self) {
|
||||
self.success += 1;
|
||||
fn on_success(&mut self, affected_rows: usize) {
|
||||
self.success += affected_rows as i32;
|
||||
}
|
||||
|
||||
fn on_failed(&mut self, datapoint: DataPointRequest, error: impl ErrorExt) {
|
||||
|
||||
@@ -21,6 +21,9 @@ use hyper::Body;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::{
|
||||
ExportMetricsServiceRequest, ExportMetricsServiceResponse,
|
||||
};
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::{
|
||||
ExportTraceServiceRequest, ExportTraceServiceResponse,
|
||||
};
|
||||
use prost::Message;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -33,16 +36,19 @@ pub async fn metrics(
|
||||
State(handler): State<OpenTelemetryProtocolHandlerRef>,
|
||||
Extension(query_ctx): Extension<QueryContextRef>,
|
||||
RawBody(body): RawBody,
|
||||
) -> Result<OtlpResponse> {
|
||||
) -> Result<OtlpMetricsResponse> {
|
||||
let _timer = timer!(
|
||||
crate::metrics::METRIC_HTTP_OPENTELEMETRY_ELAPSED,
|
||||
crate::metrics::METRIC_HTTP_OPENTELEMETRY_METRICS_ELAPSED,
|
||||
&[(crate::metrics::METRIC_DB_LABEL, query_ctx.get_db_string())]
|
||||
);
|
||||
let request = parse_body(body).await?;
|
||||
handler.metrics(request, query_ctx).await.map(OtlpResponse)
|
||||
let request = parse_metrics_body(body).await?;
|
||||
handler
|
||||
.metrics(request, query_ctx)
|
||||
.await
|
||||
.map(OtlpMetricsResponse)
|
||||
}
|
||||
|
||||
async fn parse_body(body: Body) -> Result<ExportMetricsServiceRequest> {
|
||||
async fn parse_metrics_body(body: Body) -> Result<ExportMetricsServiceRequest> {
|
||||
hyper::body::to_bytes(body)
|
||||
.await
|
||||
.context(error::HyperSnafu)
|
||||
@@ -51,9 +57,47 @@ async fn parse_body(body: Body) -> Result<ExportMetricsServiceRequest> {
|
||||
})
|
||||
}
|
||||
|
||||
pub struct OtlpResponse(ExportMetricsServiceResponse);
|
||||
pub struct OtlpMetricsResponse(ExportMetricsServiceResponse);
|
||||
|
||||
impl IntoResponse for OtlpResponse {
|
||||
impl IntoResponse for OtlpMetricsResponse {
|
||||
fn into_response(self) -> axum::response::Response {
|
||||
(
|
||||
[(header::CONTENT_TYPE, "application/x-protobuf")],
|
||||
self.0.encode_to_vec(),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
}
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn traces(
|
||||
State(handler): State<OpenTelemetryProtocolHandlerRef>,
|
||||
Extension(query_ctx): Extension<QueryContextRef>,
|
||||
RawBody(body): RawBody,
|
||||
) -> Result<OtlpTracesResponse> {
|
||||
let _timer = timer!(
|
||||
crate::metrics::METRIC_HTTP_OPENTELEMETRY_TRACES_ELAPSED,
|
||||
&[(crate::metrics::METRIC_DB_LABEL, query_ctx.get_db_string())]
|
||||
);
|
||||
let request = parse_traces_body(body).await?;
|
||||
handler
|
||||
.traces(request, query_ctx)
|
||||
.await
|
||||
.map(OtlpTracesResponse)
|
||||
}
|
||||
|
||||
async fn parse_traces_body(body: Body) -> Result<ExportTraceServiceRequest> {
|
||||
hyper::body::to_bytes(body)
|
||||
.await
|
||||
.context(error::HyperSnafu)
|
||||
.and_then(|buf| {
|
||||
ExportTraceServiceRequest::decode(&buf[..]).context(error::DecodeOtlpRequestSnafu)
|
||||
})
|
||||
}
|
||||
|
||||
pub struct OtlpTracesResponse(ExportTraceServiceResponse);
|
||||
|
||||
impl IntoResponse for OtlpTracesResponse {
|
||||
fn into_response(self) -> axum::response::Response {
|
||||
(
|
||||
[(header::CONTENT_TYPE, "application/x-protobuf")],
|
||||
|
||||
@@ -37,7 +37,10 @@ pub(crate) const METRIC_HTTP_INFLUXDB_WRITE_ELAPSED: &str = "servers.http_influx
|
||||
pub(crate) const METRIC_HTTP_PROM_STORE_WRITE_ELAPSED: &str =
|
||||
"servers.http_prometheus_write_elapsed";
|
||||
pub(crate) const METRIC_HTTP_PROM_STORE_READ_ELAPSED: &str = "servers.http_prometheus_read_elapsed";
|
||||
pub(crate) const METRIC_HTTP_OPENTELEMETRY_ELAPSED: &str = "servers.http_otlp_elapsed";
|
||||
pub(crate) const METRIC_HTTP_OPENTELEMETRY_METRICS_ELAPSED: &str =
|
||||
"servers.http_otlp_metrics_elapsed";
|
||||
pub(crate) const METRIC_HTTP_OPENTELEMETRY_TRACES_ELAPSED: &str =
|
||||
"servers.http_otlp_traces_elapsed";
|
||||
pub(crate) const METRIC_TCP_OPENTSDB_LINE_WRITE_ELAPSED: &str =
|
||||
"servers.opentsdb_line_write_elapsed";
|
||||
pub(crate) const METRIC_HTTP_PROMQL_INSTANT_QUERY_ELAPSED: &str =
|
||||
|
||||
@@ -20,16 +20,20 @@ use std::future::Future;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::RowInsertRequests;
|
||||
use async_trait::async_trait;
|
||||
use common_runtime::Runtime;
|
||||
use common_telemetry::logging::error;
|
||||
use futures::StreamExt;
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use self::codec::DataPoint;
|
||||
use crate::error::Result;
|
||||
use crate::opentsdb::connection::Connection;
|
||||
use crate::opentsdb::handler::Handler;
|
||||
use crate::prom_store::{FIELD_COLUMN_NAME, TIMESTAMP_COLUMN_NAME};
|
||||
use crate::query_handler::OpentsdbProtocolHandlerRef;
|
||||
use crate::row_writer::{self, MultiTableData};
|
||||
use crate::server::{AbortableStream, BaseTcpServer, Server};
|
||||
use crate::shutdown::Shutdown;
|
||||
|
||||
@@ -126,3 +130,38 @@ impl Server for OpentsdbServer {
|
||||
OPENTSDB_SERVER
|
||||
}
|
||||
}
|
||||
|
||||
pub fn data_point_to_grpc_row_insert_requests(
|
||||
data_points: Vec<DataPoint>,
|
||||
) -> Result<(RowInsertRequests, usize)> {
|
||||
let mut multi_table_data = MultiTableData::new();
|
||||
|
||||
for mut data_point in data_points {
|
||||
let tags: Vec<(String, String)> = std::mem::take(data_point.tags_mut());
|
||||
let table_name = data_point.metric();
|
||||
let value = data_point.value();
|
||||
let timestamp = data_point.ts_millis();
|
||||
// length of tags + 2 extra columns for greptime_timestamp and the value
|
||||
let num_columns = tags.len() + 2;
|
||||
|
||||
let table_data = multi_table_data.get_or_default_table_data(table_name, num_columns, 1);
|
||||
let mut one_row = table_data.alloc_one_row();
|
||||
|
||||
// tags
|
||||
row_writer::write_tags(table_data, tags.into_iter(), &mut one_row)?;
|
||||
|
||||
// value
|
||||
row_writer::write_f64(table_data, FIELD_COLUMN_NAME, value, &mut one_row)?;
|
||||
// timestamp
|
||||
row_writer::write_ts_millis(
|
||||
table_data,
|
||||
TIMESTAMP_COLUMN_NAME,
|
||||
Some(timestamp),
|
||||
&mut one_row,
|
||||
)?;
|
||||
|
||||
table_data.add_row(one_row);
|
||||
}
|
||||
|
||||
Ok(multi_table_data.into_row_insert_requests())
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::error::{self, Result};
|
||||
pub const OPENTSDB_TIMESTAMP_COLUMN_NAME: &str = "greptime_timestamp";
|
||||
pub const OPENTSDB_FIELD_COLUMN_NAME: &str = "greptime_value";
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DataPoint {
|
||||
metric: String,
|
||||
ts_millis: i64,
|
||||
@@ -115,6 +115,10 @@ impl DataPoint {
|
||||
&self.tags
|
||||
}
|
||||
|
||||
pub fn tags_mut(&mut self) -> &mut Vec<(String, String)> {
|
||||
&mut self.tags
|
||||
}
|
||||
|
||||
pub fn ts_millis(&self) -> i64 {
|
||||
self.ts_millis
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ impl<S: AsyncWrite + AsyncRead + Unpin> Handler<S> {
|
||||
match DataPoint::try_create(&line) {
|
||||
Ok(data_point) => {
|
||||
let _timer = timer!(crate::metrics::METRIC_TCP_OPENTSDB_LINE_WRITE_ELAPSED);
|
||||
let result = self.query_handler.exec(&data_point, ctx.clone()).await;
|
||||
let result = self.query_handler.exec(vec![data_point], ctx.clone()).await;
|
||||
if let Err(e) = result {
|
||||
self.connection.write_line(e.output_msg()).await?;
|
||||
}
|
||||
@@ -128,8 +128,8 @@ mod tests {
|
||||
|
||||
#[async_trait]
|
||||
impl OpentsdbProtocolHandler for DummyQueryHandler {
|
||||
async fn exec(&self, data_point: &DataPoint, _ctx: QueryContextRef) -> Result<()> {
|
||||
let metric = data_point.metric();
|
||||
async fn exec(&self, data_points: Vec<DataPoint>, _ctx: QueryContextRef) -> Result<usize> {
|
||||
let metric = data_points.first().unwrap().metric();
|
||||
if metric == "should_failed" {
|
||||
return error::InternalSnafu {
|
||||
err_msg: "expected",
|
||||
@@ -137,7 +137,7 @@ mod tests {
|
||||
.fail();
|
||||
}
|
||||
self.tx.send(metric.to_string()).await.unwrap();
|
||||
Ok(())
|
||||
Ok(data_points.len())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
let resp = client.read_line().await.unwrap();
|
||||
assert_eq!(resp, Some("Internal error: expected".to_string()));
|
||||
assert_eq!(resp, Some("Internal error: 1003".to_string()));
|
||||
|
||||
client.write_line("get".to_string()).await.unwrap();
|
||||
let resp = client.read_line().await.unwrap();
|
||||
|
||||
@@ -12,649 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::{RowInsertRequests, Value};
|
||||
use common_grpc::writer::Precision;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::{any_value, KeyValue};
|
||||
use opentelemetry_proto::tonic::metrics::v1::{metric, number_data_point, *};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::row_writer::{self, MultiTableData, TableData};
|
||||
pub mod metrics;
|
||||
pub mod plugin;
|
||||
pub mod trace;
|
||||
|
||||
const GREPTIME_TIMESTAMP: &str = "greptime_timestamp";
|
||||
const GREPTIME_VALUE: &str = "greptime_value";
|
||||
const GREPTIME_COUNT: &str = "greptime_count";
|
||||
/// the default column count for table writer
|
||||
const APPROXIMATE_COLUMN_COUNT: usize = 8;
|
||||
|
||||
/// Normalize otlp instrumentation, metric and attribute names
|
||||
///
|
||||
/// <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/api.md#instrument-name-syntax>
|
||||
/// - since the name are case-insensitive, we transform them to lowercase for
|
||||
/// better sql usability
|
||||
/// - replace `.` and `-` with `_`
|
||||
fn normalize_otlp_name(name: &str) -> String {
|
||||
name.to_lowercase().replace(|c| c == '.' || c == '-', "_")
|
||||
}
|
||||
|
||||
/// Convert OpenTelemetry metrics to GreptimeDB insert requests
|
||||
///
|
||||
/// See
|
||||
/// <https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/metrics/v1/metrics.proto#L162>
|
||||
/// for data structure of OTLP metrics.
|
||||
///
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
pub fn to_grpc_insert_requests(
|
||||
request: ExportMetricsServiceRequest,
|
||||
) -> Result<(RowInsertRequests, usize)> {
|
||||
let mut table_writer = MultiTableData::default();
|
||||
|
||||
for resource in &request.resource_metrics {
|
||||
let resource_attrs = resource.resource.as_ref().map(|r| &r.attributes);
|
||||
for scope in &resource.scope_metrics {
|
||||
let scope_attrs = scope.scope.as_ref().map(|s| &s.attributes);
|
||||
for metric in &scope.metrics {
|
||||
encode_metrics(&mut table_writer, metric, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(table_writer.into_row_insert_requests())
|
||||
}
|
||||
|
||||
fn encode_metrics(
|
||||
table_writer: &mut MultiTableData,
|
||||
metric: &Metric,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let name = &metric.name;
|
||||
// note that we don't store description or unit, we might want to deal with
|
||||
// these fields in the future.
|
||||
if let Some(data) = &metric.data {
|
||||
match data {
|
||||
metric::Data::Gauge(gauge) => {
|
||||
encode_gauge(table_writer, name, gauge, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
metric::Data::Sum(sum) => {
|
||||
encode_sum(table_writer, name, sum, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
metric::Data::Summary(summary) => {
|
||||
encode_summary(table_writer, name, summary, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
metric::Data::Histogram(hist) => {
|
||||
encode_histogram(table_writer, name, hist, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
// TODO(sunng87) leave ExponentialHistogram for next release
|
||||
metric::Data::ExponentialHistogram(_hist) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_attributes(
|
||||
writer: &mut TableData,
|
||||
row: &mut Vec<Value>,
|
||||
attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
if let Some(attrs) = attrs {
|
||||
let table_tags = attrs.iter().filter_map(|attr| {
|
||||
if let Some(val) = attr.value.as_ref().and_then(|v| v.value.as_ref()) {
|
||||
let key = normalize_otlp_name(&attr.key);
|
||||
match val {
|
||||
any_value::Value::StringValue(s) => Some((key, s.to_string())),
|
||||
any_value::Value::IntValue(v) => Some((key, v.to_string())),
|
||||
any_value::Value::DoubleValue(v) => Some((key, v.to_string())),
|
||||
_ => None, // TODO(sunng87): allow different type of values
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
row_writer::write_tags(writer, table_tags, row)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_timestamp(table: &mut TableData, row: &mut Vec<Value>, time_nano: i64) -> Result<()> {
|
||||
row_writer::write_ts_precision(
|
||||
table,
|
||||
GREPTIME_TIMESTAMP,
|
||||
Some(time_nano),
|
||||
Precision::Nanosecond,
|
||||
row,
|
||||
)
|
||||
}
|
||||
|
||||
fn write_data_point_value(
|
||||
table: &mut TableData,
|
||||
row: &mut Vec<Value>,
|
||||
field: &str,
|
||||
value: &Option<number_data_point::Value>,
|
||||
) -> Result<()> {
|
||||
match value {
|
||||
Some(number_data_point::Value::AsInt(val)) => {
|
||||
// we coerce all values to f64
|
||||
row_writer::write_f64(table, field, *val as f64, row)?;
|
||||
}
|
||||
Some(number_data_point::Value::AsDouble(val)) => {
|
||||
row_writer::write_f64(table, field, *val, row)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_tags_and_timestamp(
|
||||
table: &mut TableData,
|
||||
row: &mut Vec<Value>,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
data_point_attrs: Option<&Vec<KeyValue>>,
|
||||
timestamp_nanos: i64,
|
||||
) -> Result<()> {
|
||||
write_attributes(table, row, resource_attrs)?;
|
||||
write_attributes(table, row, scope_attrs)?;
|
||||
write_attributes(table, row, data_point_attrs)?;
|
||||
|
||||
write_timestamp(table, row, timestamp_nanos)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// encode this gauge metric
|
||||
///
|
||||
/// note that there can be multiple data points in the request, it's going to be
|
||||
/// stored as multiple rows
|
||||
fn encode_gauge(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
gauge: &Gauge,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let table = table_writer.get_or_default_table_data(
|
||||
&normalize_otlp_name(name),
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
gauge.data_points.len(),
|
||||
);
|
||||
|
||||
for data_point in &gauge.data_points {
|
||||
let mut row = table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
table,
|
||||
&mut row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
write_data_point_value(table, &mut row, GREPTIME_VALUE, &data_point.value)?;
|
||||
table.add_row(row);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// encode this sum metric
|
||||
///
|
||||
/// `aggregation_temporality` and `monotonic` are ignored for now
|
||||
fn encode_sum(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
sum: &Sum,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let table = table_writer.get_or_default_table_data(
|
||||
&normalize_otlp_name(name),
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
sum.data_points.len(),
|
||||
);
|
||||
|
||||
for data_point in &sum.data_points {
|
||||
let mut row = table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
table,
|
||||
&mut row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
write_data_point_value(table, &mut row, GREPTIME_VALUE, &data_point.value)?;
|
||||
table.add_row(row);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const HISTOGRAM_LE_COLUMN: &str = "le";
|
||||
|
||||
/// Encode histogram data. This function returns 3 insert requests for 3 tables.
|
||||
///
|
||||
/// The implementation has been following Prometheus histogram table format:
|
||||
///
|
||||
/// - A `%metric%_bucket` table including `greptime_le` tag that stores bucket upper
|
||||
/// limit, and `greptime_value` for bucket count
|
||||
/// - A `%metric%_sum` table storing sum of samples
|
||||
/// - A `%metric%_count` table storing count of samples.
|
||||
///
|
||||
/// By its Prometheus compatibility, we hope to be able to use prometheus
|
||||
/// quantile functions on this table.
|
||||
fn encode_histogram(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
hist: &Histogram,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let normalized_name = normalize_otlp_name(name);
|
||||
|
||||
let bucket_table_name = format!("{}_bucket", normalized_name);
|
||||
let sum_table_name = format!("{}_sum", normalized_name);
|
||||
let count_table_name = format!("{}_count", normalized_name);
|
||||
|
||||
let data_points_len = hist.data_points.len();
|
||||
// Note that the row and columns number here is approximate
|
||||
let mut bucket_table = TableData::new(APPROXIMATE_COLUMN_COUNT, data_points_len * 3);
|
||||
let mut sum_table = TableData::new(APPROXIMATE_COLUMN_COUNT, data_points_len);
|
||||
let mut count_table = TableData::new(APPROXIMATE_COLUMN_COUNT, data_points_len);
|
||||
|
||||
for data_point in &hist.data_points {
|
||||
let mut accumulated_count = 0;
|
||||
for (idx, count) in data_point.bucket_counts.iter().enumerate() {
|
||||
let mut bucket_row = bucket_table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
&mut bucket_table,
|
||||
&mut bucket_row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
if let Some(upper_bounds) = data_point.explicit_bounds.get(idx) {
|
||||
row_writer::write_tag(
|
||||
&mut bucket_table,
|
||||
HISTOGRAM_LE_COLUMN,
|
||||
upper_bounds,
|
||||
&mut bucket_row,
|
||||
)?;
|
||||
} else if idx == data_point.explicit_bounds.len() {
|
||||
// The last bucket
|
||||
row_writer::write_tag(
|
||||
&mut bucket_table,
|
||||
HISTOGRAM_LE_COLUMN,
|
||||
f64::INFINITY,
|
||||
&mut bucket_row,
|
||||
)?;
|
||||
}
|
||||
|
||||
accumulated_count += count;
|
||||
row_writer::write_f64(
|
||||
&mut bucket_table,
|
||||
GREPTIME_VALUE,
|
||||
accumulated_count as f64,
|
||||
&mut bucket_row,
|
||||
)?;
|
||||
|
||||
bucket_table.add_row(bucket_row);
|
||||
}
|
||||
|
||||
if let Some(sum) = data_point.sum {
|
||||
let mut sum_row = sum_table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
&mut sum_table,
|
||||
&mut sum_row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
row_writer::write_f64(&mut sum_table, GREPTIME_VALUE, sum, &mut sum_row)?;
|
||||
sum_table.add_row(sum_row);
|
||||
}
|
||||
|
||||
let mut count_row = count_table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
&mut count_table,
|
||||
&mut count_row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
row_writer::write_f64(
|
||||
&mut count_table,
|
||||
GREPTIME_VALUE,
|
||||
data_point.count as f64,
|
||||
&mut count_row,
|
||||
)?;
|
||||
count_table.add_row(count_row);
|
||||
}
|
||||
|
||||
table_writer.add_table_data(bucket_table_name, bucket_table);
|
||||
table_writer.add_table_data(sum_table_name, sum_table);
|
||||
table_writer.add_table_data(count_table_name, count_table);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn encode_exponential_histogram(_name: &str, _hist: &ExponentialHistogram) -> Result<()> {
|
||||
// TODO(sunng87): implement this using a prometheus compatible way
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn encode_summary(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
summary: &Summary,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let table = table_writer.get_or_default_table_data(
|
||||
&normalize_otlp_name(name),
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
summary.data_points.len(),
|
||||
);
|
||||
|
||||
for data_point in &summary.data_points {
|
||||
let mut row = table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
table,
|
||||
&mut row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
for quantile in &data_point.quantile_values {
|
||||
row_writer::write_f64(
|
||||
table,
|
||||
&format!("greptime_p{:02}", quantile.quantile * 100f64),
|
||||
quantile.value,
|
||||
&mut row,
|
||||
)?;
|
||||
}
|
||||
|
||||
row_writer::write_f64(table, GREPTIME_COUNT, data_point.count as f64, &mut row)?;
|
||||
table.add_row(row);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use opentelemetry_proto::tonic::common::v1::any_value::Value as Val;
|
||||
use opentelemetry_proto::tonic::common::v1::{AnyValue, KeyValue};
|
||||
use opentelemetry_proto::tonic::metrics::v1::number_data_point::Value;
|
||||
use opentelemetry_proto::tonic::metrics::v1::summary_data_point::ValueAtQuantile;
|
||||
use opentelemetry_proto::tonic::metrics::v1::{HistogramDataPoint, NumberDataPoint};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_otlp_name() {
|
||||
assert_eq!(normalize_otlp_name("jvm.memory.free"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("jvm-memory-free"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("jvm_memory_free"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("JVM_MEMORY_FREE"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("JVM_memory_FREE"), "jvm_memory_free");
|
||||
}
|
||||
|
||||
fn keyvalue(key: &str, value: &str) -> KeyValue {
|
||||
KeyValue {
|
||||
key: key.into(),
|
||||
value: Some(AnyValue {
|
||||
value: Some(Val::StringValue(value.into())),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_gauge() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testsevrer")],
|
||||
time_unix_nano: 100,
|
||||
value: Some(Value::AsInt(100)),
|
||||
..Default::default()
|
||||
},
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 105,
|
||||
value: Some(Value::AsInt(105)),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let gauge = Gauge { data_points };
|
||||
encode_gauge(
|
||||
&mut tables,
|
||||
"datamon",
|
||||
&gauge,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = tables.get_or_default_table_data("datamon", 0, 0);
|
||||
assert_eq!(table.num_rows(), 2);
|
||||
assert_eq!(table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_sum() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 100,
|
||||
value: Some(Value::AsInt(100)),
|
||||
..Default::default()
|
||||
},
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 105,
|
||||
value: Some(Value::AsInt(0)),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let sum = Sum {
|
||||
data_points,
|
||||
..Default::default()
|
||||
};
|
||||
encode_sum(
|
||||
&mut tables,
|
||||
"datamon",
|
||||
&sum,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = tables.get_or_default_table_data("datamon", 0, 0);
|
||||
assert_eq!(table.num_rows(), 2);
|
||||
assert_eq!(table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_summary() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![SummaryDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 100,
|
||||
count: 25,
|
||||
sum: 5400.0,
|
||||
quantile_values: vec![
|
||||
ValueAtQuantile {
|
||||
quantile: 0.90,
|
||||
value: 1000.0,
|
||||
},
|
||||
ValueAtQuantile {
|
||||
quantile: 0.95,
|
||||
value: 3030.0,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}];
|
||||
let summary = Summary { data_points };
|
||||
encode_summary(
|
||||
&mut tables,
|
||||
"datamon",
|
||||
&summary,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = tables.get_or_default_table_data("datamon", 0, 0);
|
||||
assert_eq!(table.num_rows(), 1);
|
||||
assert_eq!(table.num_columns(), 7);
|
||||
assert_eq!(
|
||||
table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_p90",
|
||||
"greptime_p95",
|
||||
"greptime_count"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_histogram() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![HistogramDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 100,
|
||||
start_time_unix_nano: 23,
|
||||
count: 25,
|
||||
sum: Some(100.),
|
||||
max: Some(200.),
|
||||
min: Some(0.03),
|
||||
bucket_counts: vec![2, 4, 6, 9, 4],
|
||||
explicit_bounds: vec![0.1, 1., 10., 100.],
|
||||
..Default::default()
|
||||
}];
|
||||
|
||||
let histogram = Histogram {
|
||||
data_points,
|
||||
aggregation_temporality: AggregationTemporality::Delta.into(),
|
||||
};
|
||||
encode_histogram(
|
||||
&mut tables,
|
||||
"histo",
|
||||
&histogram,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(3, tables.num_tables());
|
||||
|
||||
// bucket table
|
||||
let bucket_table = tables.get_or_default_table_data("histo_bucket", 0, 0);
|
||||
assert_eq!(bucket_table.num_rows(), 5);
|
||||
assert_eq!(bucket_table.num_columns(), 6);
|
||||
assert_eq!(
|
||||
bucket_table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"le",
|
||||
"greptime_value",
|
||||
]
|
||||
);
|
||||
|
||||
let sum_table = tables.get_or_default_table_data("histo_sum", 0, 0);
|
||||
assert_eq!(sum_table.num_rows(), 1);
|
||||
assert_eq!(sum_table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
sum_table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value",
|
||||
]
|
||||
);
|
||||
|
||||
let count_table = tables.get_or_default_table_data("histo_count", 0, 0);
|
||||
assert_eq!(count_table.num_rows(), 1);
|
||||
assert_eq!(count_table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
count_table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value",
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
658
src/servers/src/otlp/metrics.rs
Normal file
658
src/servers/src/otlp/metrics.rs
Normal file
@@ -0,0 +1,658 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::{RowInsertRequests, Value};
|
||||
use common_grpc::writer::Precision;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::{any_value, KeyValue};
|
||||
use opentelemetry_proto::tonic::metrics::v1::{metric, number_data_point, *};
|
||||
|
||||
use super::{GREPTIME_COUNT, GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use crate::error::Result;
|
||||
use crate::row_writer::{self, MultiTableData, TableData};
|
||||
|
||||
/// the default column count for table writer
|
||||
const APPROXIMATE_COLUMN_COUNT: usize = 8;
|
||||
|
||||
/// Normalize otlp instrumentation, metric and attribute names
|
||||
///
|
||||
/// <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/api.md#instrument-name-syntax>
|
||||
/// - since the name are case-insensitive, we transform them to lowercase for
|
||||
/// better sql usability
|
||||
/// - replace `.` and `-` with `_`
|
||||
fn normalize_otlp_name(name: &str) -> String {
|
||||
name.to_lowercase().replace(|c| c == '.' || c == '-', "_")
|
||||
}
|
||||
|
||||
/// Convert OpenTelemetry metrics to GreptimeDB insert requests
|
||||
///
|
||||
/// See
|
||||
/// <https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/metrics/v1/metrics.proto>
|
||||
/// for data structure of OTLP metrics.
|
||||
///
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
pub fn to_grpc_insert_requests(
|
||||
request: ExportMetricsServiceRequest,
|
||||
) -> Result<(RowInsertRequests, usize)> {
|
||||
let mut table_writer = MultiTableData::default();
|
||||
|
||||
for resource in &request.resource_metrics {
|
||||
let resource_attrs = resource.resource.as_ref().map(|r| &r.attributes);
|
||||
for scope in &resource.scope_metrics {
|
||||
let scope_attrs = scope.scope.as_ref().map(|s| &s.attributes);
|
||||
for metric in &scope.metrics {
|
||||
encode_metrics(&mut table_writer, metric, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(table_writer.into_row_insert_requests())
|
||||
}
|
||||
|
||||
fn encode_metrics(
|
||||
table_writer: &mut MultiTableData,
|
||||
metric: &Metric,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let name = &metric.name;
|
||||
// note that we don't store description or unit, we might want to deal with
|
||||
// these fields in the future.
|
||||
if let Some(data) = &metric.data {
|
||||
match data {
|
||||
metric::Data::Gauge(gauge) => {
|
||||
encode_gauge(table_writer, name, gauge, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
metric::Data::Sum(sum) => {
|
||||
encode_sum(table_writer, name, sum, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
metric::Data::Summary(summary) => {
|
||||
encode_summary(table_writer, name, summary, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
metric::Data::Histogram(hist) => {
|
||||
encode_histogram(table_writer, name, hist, resource_attrs, scope_attrs)?;
|
||||
}
|
||||
// TODO(sunng87) leave ExponentialHistogram for next release
|
||||
metric::Data::ExponentialHistogram(_hist) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_attributes(
|
||||
writer: &mut TableData,
|
||||
row: &mut Vec<Value>,
|
||||
attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
if let Some(attrs) = attrs {
|
||||
let table_tags = attrs.iter().filter_map(|attr| {
|
||||
if let Some(val) = attr.value.as_ref().and_then(|v| v.value.as_ref()) {
|
||||
let key = normalize_otlp_name(&attr.key);
|
||||
match val {
|
||||
any_value::Value::StringValue(s) => Some((key, s.to_string())),
|
||||
any_value::Value::IntValue(v) => Some((key, v.to_string())),
|
||||
any_value::Value::DoubleValue(v) => Some((key, v.to_string())),
|
||||
_ => None, // TODO(sunng87): allow different type of values
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
row_writer::write_tags(writer, table_tags, row)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_timestamp(table: &mut TableData, row: &mut Vec<Value>, time_nano: i64) -> Result<()> {
|
||||
row_writer::write_ts_precision(
|
||||
table,
|
||||
GREPTIME_TIMESTAMP,
|
||||
Some(time_nano),
|
||||
Precision::Nanosecond,
|
||||
row,
|
||||
)
|
||||
}
|
||||
|
||||
fn write_data_point_value(
|
||||
table: &mut TableData,
|
||||
row: &mut Vec<Value>,
|
||||
field: &str,
|
||||
value: &Option<number_data_point::Value>,
|
||||
) -> Result<()> {
|
||||
match value {
|
||||
Some(number_data_point::Value::AsInt(val)) => {
|
||||
// we coerce all values to f64
|
||||
row_writer::write_f64(table, field, *val as f64, row)?;
|
||||
}
|
||||
Some(number_data_point::Value::AsDouble(val)) => {
|
||||
row_writer::write_f64(table, field, *val, row)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_tags_and_timestamp(
|
||||
table: &mut TableData,
|
||||
row: &mut Vec<Value>,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
data_point_attrs: Option<&Vec<KeyValue>>,
|
||||
timestamp_nanos: i64,
|
||||
) -> Result<()> {
|
||||
write_attributes(table, row, resource_attrs)?;
|
||||
write_attributes(table, row, scope_attrs)?;
|
||||
write_attributes(table, row, data_point_attrs)?;
|
||||
|
||||
write_timestamp(table, row, timestamp_nanos)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// encode this gauge metric
|
||||
///
|
||||
/// note that there can be multiple data points in the request, it's going to be
|
||||
/// stored as multiple rows
|
||||
fn encode_gauge(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
gauge: &Gauge,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let table = table_writer.get_or_default_table_data(
|
||||
&normalize_otlp_name(name),
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
gauge.data_points.len(),
|
||||
);
|
||||
|
||||
for data_point in &gauge.data_points {
|
||||
let mut row = table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
table,
|
||||
&mut row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
write_data_point_value(table, &mut row, GREPTIME_VALUE, &data_point.value)?;
|
||||
table.add_row(row);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// encode this sum metric
|
||||
///
|
||||
/// `aggregation_temporality` and `monotonic` are ignored for now
|
||||
fn encode_sum(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
sum: &Sum,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let table = table_writer.get_or_default_table_data(
|
||||
&normalize_otlp_name(name),
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
sum.data_points.len(),
|
||||
);
|
||||
|
||||
for data_point in &sum.data_points {
|
||||
let mut row = table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
table,
|
||||
&mut row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
write_data_point_value(table, &mut row, GREPTIME_VALUE, &data_point.value)?;
|
||||
table.add_row(row);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const HISTOGRAM_LE_COLUMN: &str = "le";
|
||||
|
||||
/// Encode histogram data. This function returns 3 insert requests for 3 tables.
|
||||
///
|
||||
/// The implementation has been following Prometheus histogram table format:
|
||||
///
|
||||
/// - A `%metric%_bucket` table including `greptime_le` tag that stores bucket upper
|
||||
/// limit, and `greptime_value` for bucket count
|
||||
/// - A `%metric%_sum` table storing sum of samples
|
||||
/// - A `%metric%_count` table storing count of samples.
|
||||
///
|
||||
/// By its Prometheus compatibility, we hope to be able to use prometheus
|
||||
/// quantile functions on this table.
|
||||
fn encode_histogram(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
hist: &Histogram,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let normalized_name = normalize_otlp_name(name);
|
||||
|
||||
let bucket_table_name = format!("{}_bucket", normalized_name);
|
||||
let sum_table_name = format!("{}_sum", normalized_name);
|
||||
let count_table_name = format!("{}_count", normalized_name);
|
||||
|
||||
let data_points_len = hist.data_points.len();
|
||||
// Note that the row and columns number here is approximate
|
||||
let mut bucket_table = TableData::new(APPROXIMATE_COLUMN_COUNT, data_points_len * 3);
|
||||
let mut sum_table = TableData::new(APPROXIMATE_COLUMN_COUNT, data_points_len);
|
||||
let mut count_table = TableData::new(APPROXIMATE_COLUMN_COUNT, data_points_len);
|
||||
|
||||
for data_point in &hist.data_points {
|
||||
let mut accumulated_count = 0;
|
||||
for (idx, count) in data_point.bucket_counts.iter().enumerate() {
|
||||
let mut bucket_row = bucket_table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
&mut bucket_table,
|
||||
&mut bucket_row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
if let Some(upper_bounds) = data_point.explicit_bounds.get(idx) {
|
||||
row_writer::write_tag(
|
||||
&mut bucket_table,
|
||||
HISTOGRAM_LE_COLUMN,
|
||||
upper_bounds,
|
||||
&mut bucket_row,
|
||||
)?;
|
||||
} else if idx == data_point.explicit_bounds.len() {
|
||||
// The last bucket
|
||||
row_writer::write_tag(
|
||||
&mut bucket_table,
|
||||
HISTOGRAM_LE_COLUMN,
|
||||
f64::INFINITY,
|
||||
&mut bucket_row,
|
||||
)?;
|
||||
}
|
||||
|
||||
accumulated_count += count;
|
||||
row_writer::write_f64(
|
||||
&mut bucket_table,
|
||||
GREPTIME_VALUE,
|
||||
accumulated_count as f64,
|
||||
&mut bucket_row,
|
||||
)?;
|
||||
|
||||
bucket_table.add_row(bucket_row);
|
||||
}
|
||||
|
||||
if let Some(sum) = data_point.sum {
|
||||
let mut sum_row = sum_table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
&mut sum_table,
|
||||
&mut sum_row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
row_writer::write_f64(&mut sum_table, GREPTIME_VALUE, sum, &mut sum_row)?;
|
||||
sum_table.add_row(sum_row);
|
||||
}
|
||||
|
||||
let mut count_row = count_table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
&mut count_table,
|
||||
&mut count_row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
row_writer::write_f64(
|
||||
&mut count_table,
|
||||
GREPTIME_VALUE,
|
||||
data_point.count as f64,
|
||||
&mut count_row,
|
||||
)?;
|
||||
count_table.add_row(count_row);
|
||||
}
|
||||
|
||||
table_writer.add_table_data(bucket_table_name, bucket_table);
|
||||
table_writer.add_table_data(sum_table_name, sum_table);
|
||||
table_writer.add_table_data(count_table_name, count_table);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn encode_exponential_histogram(_name: &str, _hist: &ExponentialHistogram) -> Result<()> {
|
||||
// TODO(sunng87): implement this using a prometheus compatible way
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn encode_summary(
|
||||
table_writer: &mut MultiTableData,
|
||||
name: &str,
|
||||
summary: &Summary,
|
||||
resource_attrs: Option<&Vec<KeyValue>>,
|
||||
scope_attrs: Option<&Vec<KeyValue>>,
|
||||
) -> Result<()> {
|
||||
let table = table_writer.get_or_default_table_data(
|
||||
&normalize_otlp_name(name),
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
summary.data_points.len(),
|
||||
);
|
||||
|
||||
for data_point in &summary.data_points {
|
||||
let mut row = table.alloc_one_row();
|
||||
write_tags_and_timestamp(
|
||||
table,
|
||||
&mut row,
|
||||
resource_attrs,
|
||||
scope_attrs,
|
||||
Some(data_point.attributes.as_ref()),
|
||||
data_point.time_unix_nano as i64,
|
||||
)?;
|
||||
|
||||
for quantile in &data_point.quantile_values {
|
||||
row_writer::write_f64(
|
||||
table,
|
||||
&format!("greptime_p{:02}", quantile.quantile * 100f64),
|
||||
quantile.value,
|
||||
&mut row,
|
||||
)?;
|
||||
}
|
||||
|
||||
row_writer::write_f64(table, GREPTIME_COUNT, data_point.count as f64, &mut row)?;
|
||||
table.add_row(row);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use opentelemetry_proto::tonic::common::v1::any_value::Value as Val;
|
||||
use opentelemetry_proto::tonic::common::v1::{AnyValue, KeyValue};
|
||||
use opentelemetry_proto::tonic::metrics::v1::number_data_point::Value;
|
||||
use opentelemetry_proto::tonic::metrics::v1::summary_data_point::ValueAtQuantile;
|
||||
use opentelemetry_proto::tonic::metrics::v1::{HistogramDataPoint, NumberDataPoint};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_otlp_name() {
|
||||
assert_eq!(normalize_otlp_name("jvm.memory.free"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("jvm-memory-free"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("jvm_memory_free"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("JVM_MEMORY_FREE"), "jvm_memory_free");
|
||||
assert_eq!(normalize_otlp_name("JVM_memory_FREE"), "jvm_memory_free");
|
||||
}
|
||||
|
||||
fn keyvalue(key: &str, value: &str) -> KeyValue {
|
||||
KeyValue {
|
||||
key: key.into(),
|
||||
value: Some(AnyValue {
|
||||
value: Some(Val::StringValue(value.into())),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_gauge() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testsevrer")],
|
||||
time_unix_nano: 100,
|
||||
value: Some(Value::AsInt(100)),
|
||||
..Default::default()
|
||||
},
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 105,
|
||||
value: Some(Value::AsInt(105)),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let gauge = Gauge { data_points };
|
||||
encode_gauge(
|
||||
&mut tables,
|
||||
"datamon",
|
||||
&gauge,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = tables.get_or_default_table_data("datamon", 0, 0);
|
||||
assert_eq!(table.num_rows(), 2);
|
||||
assert_eq!(table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_sum() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 100,
|
||||
value: Some(Value::AsInt(100)),
|
||||
..Default::default()
|
||||
},
|
||||
NumberDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 105,
|
||||
value: Some(Value::AsInt(0)),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
let sum = Sum {
|
||||
data_points,
|
||||
..Default::default()
|
||||
};
|
||||
encode_sum(
|
||||
&mut tables,
|
||||
"datamon",
|
||||
&sum,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = tables.get_or_default_table_data("datamon", 0, 0);
|
||||
assert_eq!(table.num_rows(), 2);
|
||||
assert_eq!(table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_summary() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![SummaryDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 100,
|
||||
count: 25,
|
||||
sum: 5400.0,
|
||||
quantile_values: vec![
|
||||
ValueAtQuantile {
|
||||
quantile: 0.90,
|
||||
value: 1000.0,
|
||||
},
|
||||
ValueAtQuantile {
|
||||
quantile: 0.95,
|
||||
value: 3030.0,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}];
|
||||
let summary = Summary { data_points };
|
||||
encode_summary(
|
||||
&mut tables,
|
||||
"datamon",
|
||||
&summary,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let table = tables.get_or_default_table_data("datamon", 0, 0);
|
||||
assert_eq!(table.num_rows(), 1);
|
||||
assert_eq!(table.num_columns(), 7);
|
||||
assert_eq!(
|
||||
table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_p90",
|
||||
"greptime_p95",
|
||||
"greptime_count"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_histogram() {
|
||||
let mut tables = MultiTableData::default();
|
||||
|
||||
let data_points = vec![HistogramDataPoint {
|
||||
attributes: vec![keyvalue("host", "testserver")],
|
||||
time_unix_nano: 100,
|
||||
start_time_unix_nano: 23,
|
||||
count: 25,
|
||||
sum: Some(100.),
|
||||
max: Some(200.),
|
||||
min: Some(0.03),
|
||||
bucket_counts: vec![2, 4, 6, 9, 4],
|
||||
explicit_bounds: vec![0.1, 1., 10., 100.],
|
||||
..Default::default()
|
||||
}];
|
||||
|
||||
let histogram = Histogram {
|
||||
data_points,
|
||||
aggregation_temporality: AggregationTemporality::Delta.into(),
|
||||
};
|
||||
encode_histogram(
|
||||
&mut tables,
|
||||
"histo",
|
||||
&histogram,
|
||||
Some(&vec![keyvalue("resource", "app")]),
|
||||
Some(&vec![keyvalue("scope", "otel")]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(3, tables.num_tables());
|
||||
|
||||
// bucket table
|
||||
let bucket_table = tables.get_or_default_table_data("histo_bucket", 0, 0);
|
||||
assert_eq!(bucket_table.num_rows(), 5);
|
||||
assert_eq!(bucket_table.num_columns(), 6);
|
||||
assert_eq!(
|
||||
bucket_table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"le",
|
||||
"greptime_value",
|
||||
]
|
||||
);
|
||||
|
||||
let sum_table = tables.get_or_default_table_data("histo_sum", 0, 0);
|
||||
assert_eq!(sum_table.num_rows(), 1);
|
||||
assert_eq!(sum_table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
sum_table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value",
|
||||
]
|
||||
);
|
||||
|
||||
let count_table = tables.get_or_default_table_data("histo_count", 0, 0);
|
||||
assert_eq!(count_table.num_rows(), 1);
|
||||
assert_eq!(count_table.num_columns(), 5);
|
||||
assert_eq!(
|
||||
count_table
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| &c.column_name)
|
||||
.collect::<Vec<&String>>(),
|
||||
vec![
|
||||
"resource",
|
||||
"scope",
|
||||
"host",
|
||||
"greptime_timestamp",
|
||||
"greptime_value",
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
28
src/servers/src/otlp/plugin.rs
Normal file
28
src/servers/src/otlp/plugin.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
|
||||
use super::trace::TraceSpans;
|
||||
|
||||
/// Transformer helps to transform ExportTraceServiceRequest based on logic, like:
|
||||
/// - uplift some fields from Attributes (Map type) to column
|
||||
pub trait TraceParser: Send + Sync {
|
||||
fn parse(&self, request: ExportTraceServiceRequest) -> TraceSpans;
|
||||
fn table_name(&self) -> String;
|
||||
}
|
||||
|
||||
pub type TraceParserRef = Arc<dyn TraceParser>;
|
||||
411
src/servers/src/otlp/trace.rs
Normal file
411
src/servers/src/otlp/trace.rs
Normal file
@@ -0,0 +1,411 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, RowInsertRequests};
|
||||
use common_grpc::writer::Precision;
|
||||
use common_time::time::Time;
|
||||
use itertools::Itertools;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::any_value::Value as OtlpValue;
|
||||
use opentelemetry_proto::tonic::common::v1::{
|
||||
AnyValue, ArrayValue, InstrumentationScope, KeyValue, KeyValueList,
|
||||
};
|
||||
use opentelemetry_proto::tonic::trace::v1::span::{Event, Link};
|
||||
use opentelemetry_proto::tonic::trace::v1::{Span, Status};
|
||||
use serde_json::json;
|
||||
|
||||
use super::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use crate::error::Result;
|
||||
use crate::row_writer::{self, MultiTableData, TableData};
|
||||
|
||||
const APPROXIMATE_COLUMN_COUNT: usize = 24;
|
||||
pub const TRACE_TABLE_NAME: &str = "traces_preview_v01";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub struct TraceSpan {
|
||||
// the following are tags
|
||||
pub trace_id: String,
|
||||
pub span_id: String,
|
||||
pub parent_span_id: String,
|
||||
|
||||
// the following are fields
|
||||
pub resource_attributes: String, // TODO(yuanbohan): Map in the future
|
||||
pub scope_name: String,
|
||||
pub scope_version: String,
|
||||
pub scope_attributes: String, // TODO(yuanbohan): Map in the future
|
||||
pub trace_state: String,
|
||||
pub span_name: String,
|
||||
pub span_kind: String,
|
||||
pub span_status_code: String,
|
||||
pub span_status_message: String,
|
||||
pub span_attributes: String, // TODO(yuanbohan): Map in the future
|
||||
pub span_events: String, // TODO(yuanbohan): List in the future
|
||||
pub span_links: String, // TODO(yuanbohan): List in the future
|
||||
pub start_in_nanosecond: u64, // this is also the Timestamp Index
|
||||
pub end_in_nanosecond: u64,
|
||||
|
||||
pub uplifted_fields: Vec<(String, ColumnDataType, ValueData)>,
|
||||
}
|
||||
|
||||
pub type TraceSpans = Vec<TraceSpan>;
|
||||
|
||||
/// Convert SpanTraces to GreptimeDB row insert requests.
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
pub fn to_grpc_insert_requests(
|
||||
table_name: String,
|
||||
spans: TraceSpans,
|
||||
) -> Result<(RowInsertRequests, usize)> {
|
||||
let mut multi_table_writer = MultiTableData::default();
|
||||
let one_table_writer = multi_table_writer.get_or_default_table_data(
|
||||
table_name,
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
spans.len(),
|
||||
);
|
||||
|
||||
for span in spans {
|
||||
write_span_to_row(one_table_writer, span)?;
|
||||
}
|
||||
|
||||
Ok(multi_table_writer.into_row_insert_requests())
|
||||
}
|
||||
|
||||
pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()> {
|
||||
let mut row = writer.alloc_one_row();
|
||||
{
|
||||
// tags
|
||||
let iter = vec![
|
||||
("trace_id", span.trace_id),
|
||||
("span_id", span.span_id),
|
||||
("parent_span_id", span.parent_span_id),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(col, val)| (col.to_string(), val));
|
||||
row_writer::write_tags(writer, iter, &mut row)?;
|
||||
}
|
||||
{
|
||||
// fields
|
||||
let str_fields_iter = vec![
|
||||
("resource_attributes", span.resource_attributes),
|
||||
("scope_name", span.scope_name),
|
||||
("scope_version", span.scope_version),
|
||||
("scope_attributes", span.scope_attributes),
|
||||
("trace_state", span.trace_state),
|
||||
("span_name", span.span_name),
|
||||
("span_kind", span.span_kind),
|
||||
("span_status_code", span.span_status_code),
|
||||
("span_status_message", span.span_status_message),
|
||||
("span_attributes", span.span_attributes),
|
||||
("span_events", span.span_events),
|
||||
("span_links", span.span_links),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(col, val)| {
|
||||
(
|
||||
col.into(),
|
||||
ColumnDataType::String,
|
||||
ValueData::StringValue(val),
|
||||
)
|
||||
});
|
||||
|
||||
let time_fields_iter = vec![
|
||||
("start", span.start_in_nanosecond),
|
||||
("end", span.end_in_nanosecond),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(col, val)| {
|
||||
(
|
||||
col.into(),
|
||||
ColumnDataType::TimestampNanosecond,
|
||||
ValueData::TimestampNanosecondValue(val as i64),
|
||||
)
|
||||
});
|
||||
|
||||
row_writer::write_fields(writer, str_fields_iter, &mut row)?;
|
||||
row_writer::write_fields(writer, time_fields_iter, &mut row)?;
|
||||
row_writer::write_fields(writer, span.uplifted_fields.into_iter(), &mut row)?;
|
||||
}
|
||||
|
||||
row_writer::write_f64(
|
||||
writer,
|
||||
GREPTIME_VALUE,
|
||||
(span.end_in_nanosecond - span.start_in_nanosecond) as f64 / 1_000_000.0, // duration in millisecond
|
||||
&mut row,
|
||||
)?;
|
||||
row_writer::write_ts_precision(
|
||||
writer,
|
||||
GREPTIME_TIMESTAMP,
|
||||
Some(span.start_in_nanosecond as i64),
|
||||
Precision::Nanosecond,
|
||||
&mut row,
|
||||
)?;
|
||||
|
||||
writer.add_row(row);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn parse_span(
|
||||
resource_attrs: &[KeyValue],
|
||||
scope: &InstrumentationScope,
|
||||
span: Span,
|
||||
) -> TraceSpan {
|
||||
let (span_status_code, span_status_message) = status_to_string(&span.status);
|
||||
let span_kind = span.kind().as_str_name().into();
|
||||
TraceSpan {
|
||||
trace_id: bytes_to_hex_string(&span.trace_id),
|
||||
span_id: bytes_to_hex_string(&span.span_id),
|
||||
parent_span_id: bytes_to_hex_string(&span.parent_span_id),
|
||||
|
||||
resource_attributes: vec_kv_to_string(resource_attrs),
|
||||
trace_state: span.trace_state,
|
||||
|
||||
scope_name: scope.name.clone(),
|
||||
scope_version: scope.version.clone(),
|
||||
scope_attributes: vec_kv_to_string(&scope.attributes),
|
||||
|
||||
span_name: span.name,
|
||||
span_kind,
|
||||
span_status_code,
|
||||
span_status_message,
|
||||
span_attributes: vec_kv_to_string(&span.attributes),
|
||||
span_events: events_to_string(&span.events),
|
||||
span_links: links_to_string(&span.links),
|
||||
|
||||
start_in_nanosecond: span.start_time_unix_nano,
|
||||
end_in_nanosecond: span.end_time_unix_nano,
|
||||
|
||||
uplifted_fields: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert OpenTelemetry traces to SpanTraces
|
||||
///
|
||||
/// See
|
||||
/// <https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto>
|
||||
/// for data structure of OTLP traces.
|
||||
pub fn parse(request: ExportTraceServiceRequest) -> TraceSpans {
|
||||
let mut spans = vec![];
|
||||
for resource_spans in request.resource_spans {
|
||||
let resource_attrs = resource_spans
|
||||
.resource
|
||||
.map(|r| r.attributes)
|
||||
.unwrap_or_default();
|
||||
for scope_spans in resource_spans.scope_spans {
|
||||
let scope = scope_spans.scope.unwrap_or_default();
|
||||
for span in scope_spans.spans {
|
||||
spans.push(parse_span(&resource_attrs, &scope, span));
|
||||
}
|
||||
}
|
||||
}
|
||||
spans
|
||||
}
|
||||
|
||||
pub fn bytes_to_hex_string(bs: &[u8]) -> String {
|
||||
bs.iter().map(|b| format!("{:02x}", b)).join("")
|
||||
}
|
||||
|
||||
pub fn arr_vals_to_string(arr: &ArrayValue) -> String {
|
||||
let vs: Vec<String> = arr
|
||||
.values
|
||||
.iter()
|
||||
.filter_map(|val| any_value_to_string(val.clone()))
|
||||
.collect();
|
||||
|
||||
serde_json::to_string(&vs).unwrap_or_else(|_| "[]".into())
|
||||
}
|
||||
|
||||
pub fn vec_kv_to_string(vec: &[KeyValue]) -> String {
|
||||
let vs: HashMap<String, String> = vec
|
||||
.iter()
|
||||
.map(|kv| {
|
||||
let val = kv
|
||||
.value
|
||||
.clone()
|
||||
.and_then(any_value_to_string)
|
||||
.unwrap_or_default();
|
||||
(kv.key.clone(), val)
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::to_string(&vs).unwrap_or_else(|_| "{}".into())
|
||||
}
|
||||
|
||||
pub fn kvlist_to_string(kvlist: &KeyValueList) -> String {
|
||||
vec_kv_to_string(&kvlist.values)
|
||||
}
|
||||
|
||||
pub fn any_value_to_string(val: AnyValue) -> Option<String> {
|
||||
val.value.map(|value| match value {
|
||||
OtlpValue::StringValue(s) => s,
|
||||
OtlpValue::BoolValue(b) => b.to_string(),
|
||||
OtlpValue::IntValue(i) => i.to_string(),
|
||||
OtlpValue::DoubleValue(d) => d.to_string(),
|
||||
OtlpValue::ArrayValue(arr) => arr_vals_to_string(&arr),
|
||||
OtlpValue::KvlistValue(kv) => kvlist_to_string(&kv),
|
||||
OtlpValue::BytesValue(bs) => bytes_to_hex_string(&bs),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn event_to_string(event: &Event) -> String {
|
||||
json!({
|
||||
"name": event.name,
|
||||
"time": Time::new_nanosecond(event.time_unix_nano as i64).to_iso8601_string(),
|
||||
"attrs": vec_kv_to_string(&event.attributes),
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn events_to_string(events: &[Event]) -> String {
|
||||
let v: Vec<String> = events.iter().map(event_to_string).collect();
|
||||
serde_json::to_string(&v).unwrap_or_else(|_| "[]".into())
|
||||
}
|
||||
|
||||
pub fn link_to_string(link: &Link) -> String {
|
||||
json!({
|
||||
"trace_id": link.trace_id,
|
||||
"span_id": link.span_id,
|
||||
"trace_state": link.trace_state,
|
||||
"attributes": vec_kv_to_string(&link.attributes),
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn links_to_string(links: &[Link]) -> String {
|
||||
let v: Vec<String> = links.iter().map(link_to_string).collect();
|
||||
serde_json::to_string(&v).unwrap_or_else(|_| "[]".into())
|
||||
}
|
||||
|
||||
pub fn status_to_string(status: &Option<Status>) -> (String, String) {
|
||||
match status {
|
||||
Some(status) => (status.code().as_str_name().into(), status.message.clone()),
|
||||
None => ("".into(), "".into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_time::time::Time;
|
||||
use opentelemetry_proto::tonic::common::v1::{
|
||||
any_value, AnyValue, ArrayValue, KeyValue, KeyValueList,
|
||||
};
|
||||
use opentelemetry_proto::tonic::trace::v1::span::Event;
|
||||
use opentelemetry_proto::tonic::trace::v1::Status;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::otlp::trace::{
|
||||
arr_vals_to_string, bytes_to_hex_string, event_to_string, kvlist_to_string,
|
||||
status_to_string, vec_kv_to_string,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_bytes_to_hex_string() {
|
||||
assert_eq!(
|
||||
"24fe79948641b110a29bc27859307e8d",
|
||||
bytes_to_hex_string(&[
|
||||
36, 254, 121, 148, 134, 65, 177, 16, 162, 155, 194, 120, 89, 48, 126, 141,
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"baffeedd7b8debc0",
|
||||
bytes_to_hex_string(&[186, 255, 238, 221, 123, 141, 235, 192,])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arr_vals_to_string() {
|
||||
assert_eq!("[]", arr_vals_to_string(&ArrayValue { values: vec![] }));
|
||||
|
||||
let arr = ArrayValue {
|
||||
values: vec![
|
||||
AnyValue {
|
||||
value: Some(any_value::Value::StringValue("string_value".into())),
|
||||
},
|
||||
AnyValue {
|
||||
value: Some(any_value::Value::BoolValue(true)),
|
||||
},
|
||||
AnyValue {
|
||||
value: Some(any_value::Value::IntValue(1)),
|
||||
},
|
||||
AnyValue {
|
||||
value: Some(any_value::Value::DoubleValue(1.2)),
|
||||
},
|
||||
],
|
||||
};
|
||||
let expect = json!(["string_value", "true", "1", "1.2"]).to_string();
|
||||
assert_eq!(expect, arr_vals_to_string(&arr));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_kv_list_to_string() {
|
||||
let kvlist = KeyValueList {
|
||||
values: vec![KeyValue {
|
||||
key: "str_key".into(),
|
||||
value: Some(AnyValue {
|
||||
value: Some(any_value::Value::StringValue("val1".into())),
|
||||
}),
|
||||
}],
|
||||
};
|
||||
let expect = json!({
|
||||
"str_key": "val1",
|
||||
})
|
||||
.to_string();
|
||||
assert_eq!(expect, kvlist_to_string(&kvlist))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_event_to_string() {
|
||||
let attributes = vec![KeyValue {
|
||||
key: "str_key".into(),
|
||||
value: Some(AnyValue {
|
||||
value: Some(any_value::Value::StringValue("val1".into())),
|
||||
}),
|
||||
}];
|
||||
let event = Event {
|
||||
time_unix_nano: 1697620662450128000_u64,
|
||||
name: "event_name".into(),
|
||||
attributes,
|
||||
dropped_attributes_count: 0,
|
||||
};
|
||||
let event_string = event_to_string(&event);
|
||||
let expect = json!({
|
||||
"name": event.name,
|
||||
"time": Time::new_nanosecond(event.time_unix_nano as i64).to_iso8601_string(),
|
||||
"attrs": vec_kv_to_string(&event.attributes),
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
expect,
|
||||
serde_json::from_str::<serde_json::value::Value>(event_string.as_str()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_status_to_string() {
|
||||
let message = String::from("status message");
|
||||
let status = Status {
|
||||
code: 1,
|
||||
message: message.clone(),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
("STATUS_CODE_OK".into(), message),
|
||||
status_to_string(&Some(status)),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -34,6 +34,9 @@ use common_query::Output;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::{
|
||||
ExportMetricsServiceRequest, ExportMetricsServiceResponse,
|
||||
};
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::{
|
||||
ExportTraceServiceRequest, ExportTraceServiceResponse,
|
||||
};
|
||||
use session::context::QueryContextRef;
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -74,7 +77,7 @@ pub trait InfluxdbLineProtocolHandler {
|
||||
pub trait OpentsdbProtocolHandler {
|
||||
/// A successful request will not return a response.
|
||||
/// Only on error will the socket return a line of data.
|
||||
async fn exec(&self, data_point: &DataPoint, ctx: QueryContextRef) -> Result<()>;
|
||||
async fn exec(&self, data_points: Vec<DataPoint>, ctx: QueryContextRef) -> Result<usize>;
|
||||
}
|
||||
|
||||
pub struct PromStoreResponse {
|
||||
@@ -101,4 +104,11 @@ pub trait OpenTelemetryProtocolHandler {
|
||||
request: ExportMetricsServiceRequest,
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<ExportMetricsServiceResponse>;
|
||||
|
||||
/// Handling opentelemetry traces request
|
||||
async fn traces(
|
||||
&self,
|
||||
request: ExportTraceServiceRequest,
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<ExportTraceServiceResponse>;
|
||||
}
|
||||
|
||||
@@ -51,7 +51,8 @@ impl GrpcQueryHandler for DummyInstance {
|
||||
|
||||
#[async_trait]
|
||||
impl OpentsdbProtocolHandler for DummyInstance {
|
||||
async fn exec(&self, data_point: &DataPoint, _ctx: QueryContextRef) -> Result<()> {
|
||||
async fn exec(&self, data_points: Vec<DataPoint>, _ctx: QueryContextRef) -> Result<usize> {
|
||||
let data_point = data_points.first().unwrap();
|
||||
if data_point.metric() == "should_failed" {
|
||||
return error::InternalSnafu {
|
||||
err_msg: "expected",
|
||||
@@ -59,7 +60,7 @@ impl OpentsdbProtocolHandler for DummyInstance {
|
||||
.fail();
|
||||
}
|
||||
let _ = self.tx.send(data_point.metric().to_string()).await;
|
||||
Ok(())
|
||||
Ok(data_points.len())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,19 +164,13 @@ async fn test_opentsdb_put() {
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(result.status(), 500);
|
||||
assert_eq!(
|
||||
result.text().await,
|
||||
"{\"error\":\"Internal error: Internal error: expected\"}"
|
||||
);
|
||||
assert_eq!(result.text().await, "{\"error\":\"Internal error: 1003\"}");
|
||||
|
||||
let mut metrics = vec![];
|
||||
while let Ok(s) = rx.try_recv() {
|
||||
metrics.push(s);
|
||||
}
|
||||
assert_eq!(
|
||||
metrics,
|
||||
vec!["m1".to_string(), "m2".to_string(), "m3".to_string()]
|
||||
);
|
||||
assert_eq!(metrics, vec!["m1".to_string(), "m2".to_string()]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -208,7 +203,7 @@ async fn test_opentsdb_debug_put() {
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(result.status(), 200);
|
||||
assert_eq!(result.text().await, "{\"success\":0,\"failed\":1,\"errors\":[{\"datapoint\":{\"metric\":\"should_failed\",\"timestamp\":1000,\"value\":1.0,\"tags\":{\"host\":\"web01\"}},\"error\":\"Internal error: expected\"}]}");
|
||||
assert_eq!(result.text().await, "{\"success\":0,\"failed\":1,\"errors\":[{\"datapoint\":{\"metric\":\"should_failed\",\"timestamp\":1000,\"value\":1.0,\"tags\":{\"host\":\"web01\"}},\"error\":\"Internal error: 1003\"}]}");
|
||||
|
||||
// multiple data point summary debug put
|
||||
let result = client
|
||||
@@ -233,7 +228,7 @@ async fn test_opentsdb_debug_put() {
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(result.status(), 200);
|
||||
assert_eq!(result.text().await, "{\"success\":1,\"failed\":1,\"errors\":[{\"datapoint\":{\"metric\":\"should_failed\",\"timestamp\":1000,\"value\":1.0,\"tags\":{\"host\":\"web01\"}},\"error\":\"Internal error: expected\"}]}");
|
||||
assert_eq!(result.text().await, "{\"success\":1,\"failed\":1,\"errors\":[{\"datapoint\":{\"metric\":\"should_failed\",\"timestamp\":1000,\"value\":1.0,\"tags\":{\"host\":\"web01\"}},\"error\":\"Internal error: 1003\"}]}");
|
||||
|
||||
let mut metrics = vec![];
|
||||
while let Ok(s) = rx.try_recv() {
|
||||
|
||||
@@ -37,8 +37,8 @@ struct DummyOpentsdbInstance {
|
||||
|
||||
#[async_trait]
|
||||
impl OpentsdbProtocolHandler for DummyOpentsdbInstance {
|
||||
async fn exec(&self, data_point: &DataPoint, _ctx: QueryContextRef) -> Result<()> {
|
||||
let metric = data_point.metric();
|
||||
async fn exec(&self, data_points: Vec<DataPoint>, _ctx: QueryContextRef) -> Result<usize> {
|
||||
let metric = data_points.first().unwrap().metric();
|
||||
if metric == "should_failed" {
|
||||
return server_error::InternalSnafu {
|
||||
err_msg: "expected",
|
||||
@@ -47,7 +47,7 @@ impl OpentsdbProtocolHandler for DummyOpentsdbInstance {
|
||||
}
|
||||
let i = metric.parse::<i32>().unwrap();
|
||||
let _ = self.tx.send(i * i).await;
|
||||
Ok(())
|
||||
Ok(data_points.len())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -245,11 +245,7 @@ impl ChunkReaderBuilder {
|
||||
reader_builder = reader_builder.push_batch_iter(iter);
|
||||
}
|
||||
|
||||
let predicate = Predicate::try_new(
|
||||
self.filters.clone(),
|
||||
self.schema.store_schema().schema().clone(),
|
||||
)
|
||||
.context(error::BuildPredicateSnafu)?;
|
||||
let predicate = Predicate::new(self.filters.clone());
|
||||
|
||||
let read_opts = ReadOptions {
|
||||
batch_size: self.iter_ctx.batch_size,
|
||||
|
||||
@@ -277,7 +277,10 @@ impl ParquetReader {
|
||||
|
||||
let pruned_row_groups = self
|
||||
.predicate
|
||||
.prune_row_groups(builder.metadata().row_groups())
|
||||
.prune_row_groups(
|
||||
builder.metadata().row_groups(),
|
||||
store_schema.schema().clone(),
|
||||
)
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, valid)| if valid { Some(idx) } else { None })
|
||||
@@ -549,12 +552,11 @@ mod tests {
|
||||
let operator = create_object_store(dir.path().to_str().unwrap());
|
||||
|
||||
let projected_schema = Arc::new(ProjectedSchema::new(schema, Some(vec![1])).unwrap());
|
||||
let user_schema = projected_schema.projected_user_schema().clone();
|
||||
let reader = ParquetReader::new(
|
||||
sst_file_handle,
|
||||
operator,
|
||||
projected_schema,
|
||||
Predicate::empty(user_schema),
|
||||
Predicate::empty(),
|
||||
TimestampRange::min_to_max(),
|
||||
);
|
||||
|
||||
@@ -636,12 +638,11 @@ mod tests {
|
||||
let operator = create_object_store(dir.path().to_str().unwrap());
|
||||
|
||||
let projected_schema = Arc::new(ProjectedSchema::new(schema, Some(vec![1])).unwrap());
|
||||
let user_schema = projected_schema.projected_user_schema().clone();
|
||||
let reader = ParquetReader::new(
|
||||
file_handle,
|
||||
operator,
|
||||
projected_schema,
|
||||
Predicate::empty(user_schema),
|
||||
Predicate::empty(),
|
||||
TimestampRange::min_to_max(),
|
||||
);
|
||||
|
||||
@@ -665,14 +666,8 @@ mod tests {
|
||||
range: TimestampRange,
|
||||
expect: Vec<i64>,
|
||||
) {
|
||||
let store_schema = schema.schema_to_read().clone();
|
||||
let reader = ParquetReader::new(
|
||||
file_handle,
|
||||
object_store,
|
||||
schema,
|
||||
Predicate::empty(store_schema.schema().clone()),
|
||||
range,
|
||||
);
|
||||
let reader =
|
||||
ParquetReader::new(file_handle, object_store, schema, Predicate::empty(), range);
|
||||
let mut stream = reader.chunk_stream().await.unwrap();
|
||||
let result = stream.next_batch().await;
|
||||
|
||||
|
||||
@@ -29,9 +29,11 @@ use datatypes::prelude::ConcreteDataType;
|
||||
use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter};
|
||||
use parquet::arrow::ProjectionMask;
|
||||
use parquet::schema::types::SchemaDescriptor;
|
||||
use snafu::ResultExt;
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::BuildPredicateSnafu;
|
||||
use crate::schema::StoreSchema;
|
||||
|
||||
/// Builds row filters according to predicates.
|
||||
@@ -80,7 +82,11 @@ pub(crate) fn build_row_filter(
|
||||
Box::new(PlainTimestampRowFilter::new(time_range, ts_col_projection)) as _
|
||||
};
|
||||
let mut predicates = vec![time_range_row_filter];
|
||||
if let Ok(datafusion_filters) = predicate_to_row_filter(predicate, projection_mask) {
|
||||
if let Ok(datafusion_filters) = predicate_to_row_filter(
|
||||
predicate,
|
||||
projection_mask,
|
||||
store_schema.schema().arrow_schema(),
|
||||
) {
|
||||
predicates.extend(datafusion_filters);
|
||||
}
|
||||
let filter = RowFilter::new(predicates);
|
||||
@@ -90,9 +96,13 @@ pub(crate) fn build_row_filter(
|
||||
fn predicate_to_row_filter(
|
||||
predicate: &Predicate,
|
||||
projection_mask: ProjectionMask,
|
||||
schema: &arrow::datatypes::SchemaRef,
|
||||
) -> error::Result<Vec<Box<dyn ArrowPredicate>>> {
|
||||
let mut datafusion_predicates = Vec::with_capacity(predicate.exprs().len());
|
||||
for expr in predicate.exprs() {
|
||||
let physical_exprs = predicate
|
||||
.to_physical_exprs(schema)
|
||||
.context(BuildPredicateSnafu)?;
|
||||
let mut datafusion_predicates = Vec::with_capacity(physical_exprs.len());
|
||||
for expr in &physical_exprs {
|
||||
datafusion_predicates.push(Box::new(DatafusionArrowPredicate {
|
||||
projection_mask: projection_mask.clone(),
|
||||
physical_expr: expr.clone(),
|
||||
|
||||
@@ -27,6 +27,7 @@ use datafusion_expr::expr::InList;
|
||||
use datafusion_expr::{Between, BinaryExpr, ColumnarValue, Operator};
|
||||
use datafusion_physical_expr::execution_props::ExecutionProps;
|
||||
use datafusion_physical_expr::{create_physical_expr, PhysicalExpr};
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::BooleanArray;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::scalar_value_to_timestamp;
|
||||
@@ -39,19 +40,24 @@ mod stats;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Predicate {
|
||||
/// The schema of the table that the expressions being applied.
|
||||
schema: SchemaRef,
|
||||
/// Physical expressions of this predicate.
|
||||
exprs: Vec<Arc<dyn PhysicalExpr>>,
|
||||
/// logical exprs
|
||||
exprs: Vec<Expr>,
|
||||
}
|
||||
|
||||
impl Predicate {
|
||||
/// Creates a new `Predicate` by converting logical exprs to physical exprs that can be
|
||||
/// evaluated against record batches.
|
||||
/// Returns error when failed to convert exprs.
|
||||
pub fn try_new(exprs: Vec<Expr>, schema: SchemaRef) -> error::Result<Self> {
|
||||
let arrow_schema = schema.arrow_schema();
|
||||
let df_schema = arrow_schema
|
||||
pub fn new(exprs: Vec<Expr>) -> Self {
|
||||
Self { exprs }
|
||||
}
|
||||
|
||||
/// Builds physical exprs according to provided schema.
|
||||
pub fn to_physical_exprs(
|
||||
&self,
|
||||
schema: &arrow::datatypes::SchemaRef,
|
||||
) -> error::Result<Vec<Arc<dyn PhysicalExpr>>> {
|
||||
let df_schema = schema
|
||||
.clone()
|
||||
.to_dfschema_ref()
|
||||
.context(error::DatafusionSnafu)?;
|
||||
@@ -61,47 +67,38 @@ impl Predicate {
|
||||
// registering variables.
|
||||
let execution_props = &ExecutionProps::new();
|
||||
|
||||
let physical_exprs = exprs
|
||||
self.exprs
|
||||
.iter()
|
||||
.map(|expr| {
|
||||
create_physical_expr(
|
||||
expr.df_expr(),
|
||||
df_schema.as_ref(),
|
||||
arrow_schema.as_ref(),
|
||||
execution_props,
|
||||
)
|
||||
create_physical_expr(expr.df_expr(), df_schema.as_ref(), schema, execution_props)
|
||||
})
|
||||
.collect::<Result<_, _>>()
|
||||
.context(error::DatafusionSnafu)?;
|
||||
|
||||
Ok(Self {
|
||||
schema,
|
||||
exprs: physical_exprs,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn exprs(&self) -> &[Arc<dyn PhysicalExpr>] {
|
||||
&self.exprs
|
||||
.context(error::DatafusionSnafu)
|
||||
}
|
||||
|
||||
/// Builds an empty predicate from given schema.
|
||||
pub fn empty(schema: SchemaRef) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
exprs: vec![],
|
||||
}
|
||||
pub fn empty() -> Self {
|
||||
Self { exprs: vec![] }
|
||||
}
|
||||
|
||||
/// Evaluates the predicate against row group metadata.
|
||||
/// Returns a vector of boolean values, among which `false` means the row group can be skipped.
|
||||
pub fn prune_row_groups(&self, row_groups: &[RowGroupMetaData]) -> Vec<bool> {
|
||||
pub fn prune_row_groups(
|
||||
&self,
|
||||
row_groups: &[RowGroupMetaData],
|
||||
schema: SchemaRef,
|
||||
) -> Vec<bool> {
|
||||
let mut res = vec![true; row_groups.len()];
|
||||
let arrow_schema = self.schema.arrow_schema();
|
||||
for expr in &self.exprs {
|
||||
|
||||
let Ok(physical_exprs) = self.to_physical_exprs(schema.arrow_schema()) else {
|
||||
return res;
|
||||
};
|
||||
|
||||
let arrow_schema = schema.arrow_schema();
|
||||
for expr in &physical_exprs {
|
||||
match PruningPredicate::try_new(expr.clone(), arrow_schema.clone()) {
|
||||
Ok(p) => {
|
||||
let stat = RowGroupPruningStatistics::new(row_groups, &self.schema);
|
||||
let stat = RowGroupPruningStatistics::new(row_groups, &schema);
|
||||
match p.prune(&stat) {
|
||||
Ok(r) => {
|
||||
for (curr_val, res) in r.into_iter().zip(res.iter_mut()) {
|
||||
@@ -123,7 +120,9 @@ impl Predicate {
|
||||
|
||||
/// Prunes primary keys
|
||||
pub fn prune_primary_key(&self, primary_key: &RecordBatch) -> error::Result<bool> {
|
||||
for expr in &self.exprs {
|
||||
let pk_schema = primary_key.schema();
|
||||
let physical_exprs = self.to_physical_exprs(&pk_schema)?;
|
||||
for expr in &physical_exprs {
|
||||
// evaluate every filter against primary key
|
||||
let Ok(eva) = expr.evaluate(primary_key) else {
|
||||
continue;
|
||||
@@ -156,11 +155,22 @@ impl Predicate {
|
||||
|
||||
/// Evaluates the predicate against the `stats`.
|
||||
/// Returns a vector of boolean values, among which `false` means the row group can be skipped.
|
||||
pub fn prune_with_stats<S: PruningStatistics>(&self, stats: &S) -> Vec<bool> {
|
||||
pub fn prune_with_stats<S: PruningStatistics>(
|
||||
&self,
|
||||
stats: &S,
|
||||
schema: &arrow::datatypes::SchemaRef,
|
||||
) -> Vec<bool> {
|
||||
let mut res = vec![true; stats.num_containers()];
|
||||
let arrow_schema = self.schema.arrow_schema();
|
||||
for expr in &self.exprs {
|
||||
match PruningPredicate::try_new(expr.clone(), arrow_schema.clone()) {
|
||||
let physical_exprs = match self.to_physical_exprs(schema) {
|
||||
Ok(expr) => expr,
|
||||
Err(e) => {
|
||||
warn!(e; "Failed to build physical expr from predicates: {:?}", &self.exprs);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
for expr in &physical_exprs {
|
||||
match PruningPredicate::try_new(expr.clone(), schema.clone()) {
|
||||
Ok(p) => match p.prune(stats) {
|
||||
Ok(r) => {
|
||||
for (curr_val, res) in r.into_iter().zip(res.iter_mut()) {
|
||||
@@ -641,7 +651,7 @@ mod tests {
|
||||
let dir = create_temp_dir("prune_parquet");
|
||||
let (path, schema) = gen_test_parquet_file(&dir, array_cnt).await;
|
||||
let schema = Arc::new(datatypes::schema::Schema::try_from(schema).unwrap());
|
||||
let arrow_predicate = Predicate::try_new(filters, schema.clone()).unwrap();
|
||||
let arrow_predicate = Predicate::new(filters);
|
||||
let builder = ParquetRecordBatchStreamBuilder::new(
|
||||
tokio::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
@@ -653,7 +663,7 @@ mod tests {
|
||||
.unwrap();
|
||||
let metadata = builder.metadata().clone();
|
||||
let row_groups = metadata.row_groups();
|
||||
let res = arrow_predicate.prune_row_groups(row_groups);
|
||||
let res = arrow_predicate.prune_row_groups(row_groups, schema);
|
||||
assert_eq!(expect, res);
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,8 @@ mod tests {
|
||||
|
||||
async fn test_exec(instance: &Arc<Instance>) {
|
||||
let ctx = QueryContext::arc();
|
||||
|
||||
// should create new table "my_metric_1" directly
|
||||
let data_point1 = DataPoint::new(
|
||||
"my_metric_1".to_string(),
|
||||
1000,
|
||||
@@ -55,9 +57,8 @@ mod tests {
|
||||
("tagk2".to_string(), "tagv2".to_string()),
|
||||
],
|
||||
);
|
||||
// should create new table "my_metric_1" directly
|
||||
instance.exec(&data_point1, ctx.clone()).await.unwrap();
|
||||
|
||||
// should create new column "tagk3" directly
|
||||
let data_point2 = DataPoint::new(
|
||||
"my_metric_1".to_string(),
|
||||
2000,
|
||||
@@ -67,12 +68,12 @@ mod tests {
|
||||
("tagk3".to_string(), "tagv3".to_string()),
|
||||
],
|
||||
);
|
||||
// should create new column "tagk3" directly
|
||||
instance.exec(&data_point2, ctx.clone()).await.unwrap();
|
||||
|
||||
let data_point3 = DataPoint::new("my_metric_1".to_string(), 3000, 3.0, vec![]);
|
||||
// should handle null tags properly
|
||||
instance.exec(&data_point3, ctx.clone()).await.unwrap();
|
||||
let data_point3 = DataPoint::new("my_metric_1".to_string(), 3000, 3.0, vec![]);
|
||||
|
||||
let data_points = vec![data_point1, data_point2, data_point3];
|
||||
instance.exec(data_points, ctx.clone()).await.unwrap();
|
||||
|
||||
let output = instance
|
||||
.do_query(
|
||||
@@ -87,13 +88,13 @@ mod tests {
|
||||
let recordbatches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
let pretty_print = recordbatches.pretty_print().unwrap();
|
||||
let expected = vec![
|
||||
"+---------------------+----------------+-------+-------+-------+",
|
||||
"| greptime_timestamp | greptime_value | tagk1 | tagk2 | tagk3 |",
|
||||
"+---------------------+----------------+-------+-------+-------+",
|
||||
"| 1970-01-01T00:00:01 | 1.0 | tagv1 | tagv2 | |",
|
||||
"| 1970-01-01T00:00:02 | 2.0 | | tagv2 | tagv3 |",
|
||||
"| 1970-01-01T00:00:03 | 3.0 | | | |",
|
||||
"+---------------------+----------------+-------+-------+-------+",
|
||||
"+-------+-------+----------------+---------------------+-------+",
|
||||
"| tagk1 | tagk2 | greptime_value | greptime_timestamp | tagk3 |",
|
||||
"+-------+-------+----------------+---------------------+-------+",
|
||||
"| tagv1 | tagv2 | 1.0 | 1970-01-01T00:00:01 | |",
|
||||
"| | tagv2 | 2.0 | 1970-01-01T00:00:02 | tagv3 |",
|
||||
"| | | 3.0 | 1970-01-01T00:00:03 | |",
|
||||
"+-------+-------+----------------+---------------------+-------+",
|
||||
]
|
||||
.into_iter()
|
||||
.join("\n");
|
||||
|
||||
@@ -237,7 +237,7 @@ SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHER
|
||||
-- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1;
|
||||
SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;
|
||||
|
||||
Error: 1003(Internal), Invalid argument error: must either specify a row count or at least one column
|
||||
Error: 3001(EngineExecuteQuery), Invalid argument error: must either specify a row count or at least one column
|
||||
|
||||
SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ Error: 3000(PlanQuery), Error during planning: Order by column out of bounds, sp
|
||||
-- Not work in greptimedb
|
||||
SELECT a FROM test ORDER BY 'hello', a;
|
||||
|
||||
Error: 1003(Internal), Error during planning: Sort operation is not applicable to scalar value hello
|
||||
Error: 3001(EngineExecuteQuery), Error during planning: Sort operation is not applicable to scalar value hello
|
||||
|
||||
-- Ambiguous reference in union alias, give and error in duckdb, but works in greptimedb
|
||||
SELECT a AS k, b FROM test UNION SELECT a, b AS k FROM test ORDER BY k;
|
||||
@@ -54,7 +54,7 @@ Error: 3000(PlanQuery), Error during planning: Order by column out of bounds, sp
|
||||
|
||||
SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY -1;
|
||||
|
||||
Error: 1003(Internal), Error during planning: Sort operation is not applicable to scalar value -1
|
||||
Error: 3001(EngineExecuteQuery), Error during planning: Sort operation is not applicable to scalar value -1
|
||||
|
||||
SELECT a % 2, b FROM test UNION SELECT a % 2 AS k FROM test ORDER BY -1;
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ Error: 2000(InvalidSyntax), sql parser error: Illegal Range select, no RANGE key
|
||||
|
||||
SELECT min(val) RANGE '10s', max(val) FROM host ALIGN '5s';
|
||||
|
||||
Error: 1003(Internal), No field named "MAX(host.val)". Valid fields are "MIN(host.val) RANGE 10s FILL NULL", host.ts, host.host.
|
||||
Error: 3001(EngineExecuteQuery), No field named "MAX(host.val)". Valid fields are "MIN(host.val) RANGE 10s FILL NULL", host.ts, host.host.
|
||||
|
||||
SELECT min(val) * 2 RANGE '10s' FROM host ALIGN '5s';
|
||||
|
||||
@@ -50,12 +50,12 @@ Error: 2000(InvalidSyntax), sql parser error: Can't use the RANGE keyword in Exp
|
||||
-- 2.2 no align param
|
||||
SELECT min(val) RANGE '5s' FROM host;
|
||||
|
||||
Error: 1003(Internal), Error during planning: Missing argument in range select query
|
||||
Error: 3000(PlanQuery), Error during planning: Missing argument in range select query
|
||||
|
||||
-- 2.3 type mismatch
|
||||
SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s';
|
||||
|
||||
Error: 1003(Internal), Internal error: Unsupported data type Int64 for function ceil. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
|
||||
Error: 3001(EngineExecuteQuery), Internal error: Unsupported data type Int64 for function ceil. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
|
||||
|
||||
-- 2.4 nest query
|
||||
SELECT min(max(val) RANGE '20s') RANGE '20s' FROM host ALIGN '10s';
|
||||
@@ -70,11 +70,11 @@ Error: 2000(InvalidSyntax), Range Query: Window functions is not allowed in Rang
|
||||
-- 2.6 invalid fill
|
||||
SELECT min(val) RANGE '5s', min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s';
|
||||
|
||||
Error: 1003(Internal), Schema contains duplicate unqualified field name "MIN(host.val) RANGE 5s FILL NULL"
|
||||
Error: 3001(EngineExecuteQuery), Schema contains duplicate unqualified field name "MIN(host.val) RANGE 5s FILL NULL"
|
||||
|
||||
SELECT min(val) RANGE '5s' FROM host ALIGN '5s' FILL 3.0;
|
||||
|
||||
Error: 1003(Internal), Error during planning: 3.0 is not a valid fill option, fail to convert to a const value. { Arrow error: Cast error: Cannot cast string '3.0' to value of Int64 type }
|
||||
Error: 3000(PlanQuery), Error during planning: 3.0 is not a valid fill option, fail to convert to a const value. { Arrow error: Cast error: Cannot cast string '3.0' to value of Int64 type }
|
||||
|
||||
DROP TABLE host;
|
||||
|
||||
|
||||
83
tests/cases/standalone/common/select/prune.result
Normal file
83
tests/cases/standalone/common/select/prune.result
Normal file
@@ -0,0 +1,83 @@
|
||||
create table demo(ts timestamp time index, `value` double, host string,idc string, collector string, primary key(host, idc, collector));
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into demo values(1,2,'test1', 'idc1', 'disk') ,(2,3,'test2', 'idc1', 'disk'), (3,4,'test3', 'idc2','memory');
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
select * from demo where host='test1';
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.001 | 2.0 | test1 | idc1 | disk |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where host='test2';
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.002 | 3.0 | test2 | idc1 | disk |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where host='test3';
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.003 | 4.0 | test3 | idc2 | memory |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where host='test2' and idc='idc1';
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.002 | 3.0 | test2 | idc1 | disk |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where host='test2' and idc='idc1' and collector='disk';
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.002 | 3.0 | test2 | idc1 | disk |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where host='test2' and idc='idc2';
|
||||
|
||||
++
|
||||
++
|
||||
|
||||
select * from demo where host='test3' and idc>'idc1';
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.003 | 4.0 | test3 | idc2 | memory |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where idc='idc1' order by ts;
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.001 | 2.0 | test1 | idc1 | disk |
|
||||
| 1970-01-01T00:00:00.002 | 3.0 | test2 | idc1 | disk |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
select * from demo where collector='disk' order by ts;
|
||||
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| ts | value | host | idc | collector |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
| 1970-01-01T00:00:00.001 | 2.0 | test1 | idc1 | disk |
|
||||
| 1970-01-01T00:00:00.002 | 3.0 | test2 | idc1 | disk |
|
||||
+-------------------------+-------+-------+------+-----------+
|
||||
|
||||
drop table demo;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
23
tests/cases/standalone/common/select/prune.sql
Normal file
23
tests/cases/standalone/common/select/prune.sql
Normal file
@@ -0,0 +1,23 @@
|
||||
create table demo(ts timestamp time index, `value` double, host string,idc string, collector string, primary key(host, idc, collector));
|
||||
|
||||
insert into demo values(1,2,'test1', 'idc1', 'disk') ,(2,3,'test2', 'idc1', 'disk'), (3,4,'test3', 'idc2','memory');
|
||||
|
||||
select * from demo where host='test1';
|
||||
|
||||
select * from demo where host='test2';
|
||||
|
||||
select * from demo where host='test3';
|
||||
|
||||
select * from demo where host='test2' and idc='idc1';
|
||||
|
||||
select * from demo where host='test2' and idc='idc1' and collector='disk';
|
||||
|
||||
select * from demo where host='test2' and idc='idc2';
|
||||
|
||||
select * from demo where host='test3' and idc>'idc1';
|
||||
|
||||
select * from demo where idc='idc1' order by ts;
|
||||
|
||||
select * from demo where collector='disk' order by ts;
|
||||
|
||||
drop table demo;
|
||||
@@ -249,11 +249,11 @@ SELECT TIMESTAMP '1992-09-20 11:30:00.123456' - interval_value as new_value from
|
||||
-- Interval type does not support aggregation functions.
|
||||
SELECT MIN(interval_value) from intervals;
|
||||
|
||||
Error: 1003(Internal), Internal error: Min/Max accumulator not implemented for type Interval(MonthDayNano). This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
|
||||
Error: 3001(EngineExecuteQuery), Internal error: Min/Max accumulator not implemented for type Interval(MonthDayNano). This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
|
||||
|
||||
SELECT MAX(interval_value) from intervals;
|
||||
|
||||
Error: 1003(Internal), Internal error: Min/Max accumulator not implemented for type Interval(MonthDayNano). This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
|
||||
Error: 3001(EngineExecuteQuery), Internal error: Min/Max accumulator not implemented for type Interval(MonthDayNano). This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
|
||||
|
||||
SELECT SUM(interval_value) from intervals;
|
||||
|
||||
|
||||
@@ -83,19 +83,19 @@ Error: 3000(PlanQuery), Error during planning: The function Avg does not support
|
||||
|
||||
SELECT t+t FROM timestamp;
|
||||
|
||||
Error: 1003(Internal), Cast error: Cannot perform arithmetic operation between array of type Timestamp(Millisecond, None) and array of type Timestamp(Millisecond, None)
|
||||
Error: 3001(EngineExecuteQuery), Cast error: Cannot perform arithmetic operation between array of type Timestamp(Millisecond, None) and array of type Timestamp(Millisecond, None)
|
||||
|
||||
SELECT t*t FROM timestamp;
|
||||
|
||||
Error: 1003(Internal), Invalid argument error: column types must match schema types, expected Interval(DayTime) but found Timestamp(Millisecond, None) at column index 0
|
||||
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Interval(DayTime) but found Timestamp(Millisecond, None) at column index 0
|
||||
|
||||
SELECT t/t FROM timestamp;
|
||||
|
||||
Error: 1003(Internal), Invalid argument error: column types must match schema types, expected Interval(DayTime) but found Timestamp(Millisecond, None) at column index 0
|
||||
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Interval(DayTime) but found Timestamp(Millisecond, None) at column index 0
|
||||
|
||||
SELECT t%t FROM timestamp;
|
||||
|
||||
Error: 1003(Internal), Invalid argument error: column types must match schema types, expected Interval(DayTime) but found Timestamp(Millisecond, None) at column index 0
|
||||
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Interval(DayTime) but found Timestamp(Millisecond, None) at column index 0
|
||||
|
||||
-- TODO(dennis): It can't run on distributed mode, uncomment it when the issue is fixed: https://github.com/GreptimeTeam/greptimedb/issues/2071 --
|
||||
-- SELECT t-t FROM timestamp; --
|
||||
|
||||
@@ -23,7 +23,7 @@ SELECT CAST(sec AS VARCHAR), CAST(msec AS VARCHAR), CAST(micros AS VARCHAR), CAS
|
||||
|
||||
SELECT EXTRACT(MICROSECONDS FROM sec), EXTRACT(MICROSECONDS FROM msec), EXTRACT(MICROSECONDS FROM micros), EXTRACT(MICROSECONDS FROM nanos) FROM ts_precision;
|
||||
|
||||
Error: 1003(Internal), Execution error: Date part 'MICROSECONDS' not supported
|
||||
Error: 3001(EngineExecuteQuery), Execution error: Date part 'MICROSECONDS' not supported
|
||||
|
||||
-- we only support precisions 0, 3, 6, and 9
|
||||
-- any other precision is rounded up (e.g. 1/2 -> 3, 4/5 -> 6, 7/8 -> 9)
|
||||
|
||||
@@ -4,12 +4,11 @@ require_lease_before_startup = true
|
||||
rpc_addr = '127.0.0.1:4100'
|
||||
rpc_hostname = '127.0.0.1'
|
||||
rpc_runtime_size = 8
|
||||
require_lease_before_startup = true
|
||||
|
||||
[wal]
|
||||
file_size = '1GB'
|
||||
purge_interval = '10m'
|
||||
purge_threshold = '50GB'
|
||||
purge_threshold = '10GB'
|
||||
read_batch_size = 128
|
||||
sync_write = false
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ require_lease_before_startup = true
|
||||
[wal]
|
||||
file_size = '1GB'
|
||||
purge_interval = '10m'
|
||||
purge_threshold = '50GB'
|
||||
purge_threshold = '10GB'
|
||||
read_batch_size = 128
|
||||
sync_write = false
|
||||
|
||||
|
||||
@@ -235,6 +235,8 @@ impl Env {
|
||||
args.push(format!("--http-addr=127.0.0.1:430{id}"));
|
||||
args.push(format!("--data-home={}", data_home.display()));
|
||||
args.push(format!("--node-id={id}"));
|
||||
args.push("-c".to_string());
|
||||
args.push(self.generate_config_file(subcommand, db_ctx));
|
||||
args.push("--metasrv-addr=127.0.0.1:3002".to_string());
|
||||
(args, format!("127.0.0.1:410{id}"))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user