Compare commits

..

18 Commits

Author SHA1 Message Date
discord9
8ca3d72e3f feat(exp): adjust_flow admin function 2025-06-05 13:52:08 +08:00
Weny Xu
80c5af0ecf fix: ignore incomplete WAL entries during read (#6251)
* fix: ignore incomplete entry

* fix: fix unit tests
2025-06-04 11:16:42 +00:00
LFC
7afb77fd35 fix: add "query" options to standalone (#6248) 2025-06-04 08:47:31 +00:00
discord9
0b9af77fe9 chore: test sleep longer (#6247)
* chore: test sleep longer

* win timer resolution is 15.6ms, need longer
2025-06-04 08:18:44 +00:00
discord9
cbafb6e00b feat(flow): flow streaming mode in list expr support (#6229)
* feat: flow streaming in list support

* chore: per review

* chore: per review

* fix: expr correct type
2025-06-04 08:05:20 +00:00
LFC
744a754246 fix: add missing features (#6245) 2025-06-04 07:13:39 +00:00
fys
9cd4a2c525 feat: add trigger ddl manager (#6228)
* feat: add trigger ddl manager

* chore: reduce the number of cfg feature code blocks

* upgrade greptime-proto

* chore: upgrade greptime-proto
2025-06-04 06:38:02 +00:00
liyang
180920327b ci: add option to choose whether upload artifacts to S3 in the development build (#6232)
ci: add option to choose whether to upload artifacts to S3 in the development build
2025-06-04 03:49:53 +00:00
Yingwen
ee4f830be6 fix: do not accommodate fields for multi-value protocol (#6237) 2025-06-04 01:10:52 +00:00
shuiyisong
69975f1f71 feat: pipeline with insert options (#6192)
* feat: pipeline recognize hints from exec

* chore: rename and add test

* chore: minor improve

* chore: rename and add comments

* fix: typos

* chore: remove unnecessory clone fn

* chore: group metrics

* chore: use struct in transform output enum

* chore: update hint prefix
2025-06-03 18:46:48 +00:00
discord9
38cac301f2 refactor(flow): limit the size of query (#6216)
* refactor: not wait for slow query

* chore: clippy

* chore: fmt

* WIP: time range lock

* WIP

* refactor: rm over-complicated query pool

* chore: add more metrics& rm sql from slow query metrics
2025-06-03 12:27:07 +00:00
Yuhan Wang
083c22b90a refactor: extract some common functions and structs in election module (#6172)
* refactor: extract some common functions and structs in election module

* chore: add comments and modify a function name

* chore: add comments and modify a function name

* fix: missing 2 lines in license header

* fix: acqrel

* chore: apply comment suggestions

* Update src/meta-srv/src/election.rs

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

---------

Co-authored-by: jeremyhi <jiachun_feng@proton.me>
2025-06-03 11:31:30 +00:00
Lei, HUANG
fdd164c0fa fix(mito): revert initial builder capacity for TimeSeriesMemtable (#6231)
* fix/initial-builder-cap:
 ### Enhance Series Initialization and Capacity Management

 - **`simple_bulk_memtable.rs`**: Updated the `Series` initialization to use `with_capacity` with a specified capacity of 8192, improving memory management.
 - **`time_series.rs`**: Introduced `with_capacity` method in `Series` to allow custom initial capacity for `ValueBuilder`. Adjusted `INITIAL_BUILDER_CAPACITY` to 16 for more efficient memory usage. Added a new `new` method to maintain backward compatibility.

* fix/initial-builder-cap:
 ### Adjust Memory Allocation in Memtable

 - **`simple_bulk_memtable.rs`**: Reduced the initial capacity of `Series` from 8192 to 1024 to optimize memory usage.
 - **`time_series.rs`**: Decreased `INITIAL_BUILDER_CAPACITY` from 16 to 4 to improve efficiency in vector building.
2025-06-03 08:25:02 +00:00
Zhenchi
078afb2bd6 feat: bloom filter index applier support or eq chain (#6227)
* feat: bloom filter index applier support or eq chain

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-06-03 08:08:19 +00:00
localhost
477e4cc344 chore: add pg mysql be default feature in cli (#6230) 2025-06-03 07:09:26 +00:00
Lei, HUANG
078d83cec2 chore: add some metrics to grafana dashboard (#6169)
* add compaction elapsed time avg and bulk request convert elapsed time to grafana dashboard

* fix: standalone dashboard conversion

* chore: newline

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2025-06-03 03:33:11 +00:00
liyang
7705d84d83 docs: fix bad link (#6222)
* docs: fix bad link

* Update how-to-profile-memory.md
2025-06-03 03:19:10 +00:00
dennis zhuang
0d81400bb4 feat: supports select @@session.time_zone (#6212) 2025-06-03 02:32:19 +00:00
92 changed files with 2799 additions and 747 deletions

View File

@@ -55,6 +55,11 @@ on:
description: Build and push images to DockerHub and ACR
required: false
default: true
upload_artifacts_to_s3:
type: boolean
description: Whether upload artifacts to s3
required: false
default: false
cargo_profile:
type: choice
description: The cargo profile to use in building GreptimeDB.
@@ -238,7 +243,7 @@ jobs:
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images.
- name: Echo Docker image tag to step summary
run: |
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
@@ -281,7 +286,7 @@ jobs:
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-to-s3: false
upload-to-s3: ${{ inputs.upload_artifacts_to_s3 }}
dev-mode: true # Only build the standard images(exclude centos images).
push-latest-tag: false # Don't push the latest tag to registry.
update-version-info: false # Don't update the version info in S3.

2
Cargo.lock generated
View File

@@ -4876,7 +4876,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=442348b2518c0bf187fb1ad011ba370c38b96cc4#442348b2518c0bf187fb1ad011ba370c38b96cc4"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=2dca1dc67862d7b410838aef81232274c019b3f6#2dca1dc67862d7b410838aef81232274c019b3f6"
dependencies = [
"prost 0.13.5",
"serde",

View File

@@ -132,7 +132,7 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "442348b2518c0bf187fb1ad011ba370c38b96cc4" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "2dca1dc67862d7b410838aef81232274c019b3f6" }
hex = "0.4"
http = "1"
humantime = "2.1"

View File

@@ -1,6 +1,6 @@
# Profile memory usage of GreptimeDB
This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](docs/how-to/memory-profile-scripts).
This crate provides an easy approach to dump memory profiling info. A set of ready to use scripts is provided in [docs/how-to/memory-profile-scripts](./memory-profile-scripts/scripts).
## Prerequisites
### jemalloc

View File

@@ -25,7 +25,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 5,
"id": 7,
"links": [],
"panels": [
{
@@ -4476,7 +4476,7 @@
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
@@ -4553,9 +4553,22 @@
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-p99",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"editorMode": "code",
"expr": "sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~\"$datanode\"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~\"$datanode\"}[$__rate_interval]))",
"hide": false,
"instant": false,
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-avg",
"range": true,
"refId": "B"
}
],
"title": "Compaction P99 per Instance by Stage",
"title": "Compaction Elapsed Time per Instance by Stage",
"type": "timeseries"
},
{
@@ -5546,13 +5559,131 @@
"title": "Region Worker Handle Bulk Insert Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"description": "Per-stage elapsed time for region worker to decode requests.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 117
},
"id": 338,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-P95",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"editorMode": "code",
"expr": "sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))",
"hide": false,
"instant": false,
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG",
"range": true,
"refId": "B"
}
],
"title": "Region Worker Convert Requests",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 117
"y": 125
},
"id": 313,
"panels": [
@@ -6682,7 +6813,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 118
"y": 126
},
"id": 324,
"panels": [
@@ -6979,7 +7110,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 119
"y": 127
},
"id": 328,
"panels": [
@@ -7627,6 +7758,5 @@
"timezone": "",
"title": "GreptimeDB",
"uid": "dejf3k5e7g2kgb",
"version": 3,
"weekStart": ""
"version": 3
}

View File

@@ -60,7 +60,7 @@
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))`<br/>`sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{instance=~"$datanode"}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{instance=~"$datanode"}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
@@ -70,6 +70,7 @@
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
| Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`<br/>`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |

View File

@@ -487,7 +487,7 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- title: Compaction P99 per Instance by Stage
- title: Compaction Elapsed Time per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
@@ -497,6 +497,11 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
- expr: sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{instance=~"$datanode"}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{instance=~"$datanode"}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
@@ -607,6 +612,21 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
- title: Region Worker Convert Requests
type: timeseries
description: Per-stage elapsed time for region worker to decode requests.
unit: s
queries:
- expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
- expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
- title: OpenDAL
panels:
- title: QPS per Instance

View File

@@ -25,7 +25,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 5,
"id": 7,
"links": [],
"panels": [
{
@@ -4476,7 +4476,7 @@
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
@@ -4553,9 +4553,22 @@
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-p99",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"editorMode": "code",
"expr": "sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))",
"hide": false,
"instant": false,
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-avg",
"range": true,
"refId": "B"
}
],
"title": "Compaction P99 per Instance by Stage",
"title": "Compaction Elapsed Time per Instance by Stage",
"type": "timeseries"
},
{
@@ -5546,13 +5559,131 @@
"title": "Region Worker Handle Bulk Insert Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"description": "Per-stage elapsed time for region worker to decode requests.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "points",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 117
},
"id": 338,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-P95",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${metrics}"
},
"editorMode": "code",
"expr": "sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))",
"hide": false,
"instant": false,
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG",
"range": true,
"refId": "B"
}
],
"title": "Region Worker Convert Requests",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 117
"y": 125
},
"id": 313,
"panels": [
@@ -6682,7 +6813,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 118
"y": 126
},
"id": 324,
"panels": [
@@ -6979,7 +7110,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 119
"y": 127
},
"id": 328,
"panels": [
@@ -7627,6 +7758,5 @@
"timezone": "",
"title": "GreptimeDB",
"uid": "dejf3k5e7g2kgb",
"version": 3,
"weekStart": ""
"version": 3
}

View File

@@ -60,7 +60,7 @@
| Read Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_read_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Read Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Write Stage P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_write_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Write Stage P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]` |
| Compaction OPS per Instance | `sum by(instance, pod) (rate(greptime_mito_compaction_total_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction OPS per Instance. | `prometheus` | `ops` | `[{{ instance }}]-[{{pod}}]` |
| Compaction P99 per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction Elapsed Time per Instance by Stage | `histogram_quantile(0.99, sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_bucket{}[$__rate_interval])))`<br/>`sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))` | `timeseries` | Compaction latency by stage | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-p99` |
| Compaction P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le,stage) (rate(greptime_mito_compaction_total_elapsed_bucket{}[$__rate_interval])))` | `timeseries` | Compaction P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-compaction` |
| WAL write size | `histogram_quantile(0.95, sum by(le,instance, pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`histogram_quantile(0.99, sum by(le,instance,pod) (rate(raft_engine_write_size_bucket[$__rate_interval])))`<br/>`sum by (instance, pod)(rate(raft_engine_write_size_sum[$__rate_interval]))` | `timeseries` | Write-ahead logs write size as bytes. This chart includes stats of p95 and p99 size by instance, total WAL write rate. | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-req-size-p95` |
| Cached Bytes per Instance | `greptime_mito_cache_bytes{}` | `timeseries` | Cached Bytes per Instance. | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
@@ -70,6 +70,7 @@
| Inflight Flush | `greptime_mito_inflight_flush_count` | `timeseries` | Ongoing flush task count | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]` |
| Compaction Input/Output Bytes | `sum by(instance, pod) (greptime_mito_compaction_input_bytes)`<br/>`sum by(instance, pod) (greptime_mito_compaction_output_bytes)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `bytes` | `[{{instance}}]-[{{pod}}]-input` |
| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
# OpenDAL
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
| --- | --- | --- | --- | --- | --- | --- |

View File

@@ -487,7 +487,7 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{ instance }}]-[{{pod}}]'
- title: Compaction P99 per Instance by Stage
- title: Compaction Elapsed Time per Instance by Stage
type: timeseries
description: Compaction latency by stage
unit: s
@@ -497,6 +497,11 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-p99'
- expr: sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_sum{}[$__rate_interval]))/sum by(instance, pod, le, stage) (rate(greptime_mito_compaction_stage_elapsed_count{}[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-avg'
- title: Compaction P99 per Instance
type: timeseries
description: Compaction P99 per Instance.
@@ -607,6 +612,21 @@ groups:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
- title: Region Worker Convert Requests
type: timeseries
description: Per-stage elapsed time for region worker to decode requests.
unit: s
queries:
- expr: histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-P95'
- expr: sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))
datasource:
type: prometheus
uid: ${metrics}
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
- title: OpenDAL
panels:
- title: QPS per Instance

View File

@@ -6,7 +6,7 @@ DAC_IMAGE=ghcr.io/zyy17/dac:20250423-522bd35
remove_instance_filters() {
# Remove the instance filters for the standalone dashboards.
sed 's/instance=~\\"$datanode\\",//; s/instance=~\\"$datanode\\"//; s/instance=~\\"$frontend\\",//; s/instance=~\\"$frontend\\"//; s/instance=~\\"$metasrv\\",//; s/instance=~\\"$metasrv\\"//; s/instance=~\\"$flownode\\",//; s/instance=~\\"$flownode\\"//;' $CLUSTER_DASHBOARD_DIR/dashboard.json > $STANDALONE_DASHBOARD_DIR/dashboard.json
sed -E 's/instance=~\\"(\$datanode|\$frontend|\$metasrv|\$flownode)\\",?//g' "$CLUSTER_DASHBOARD_DIR/dashboard.json" > "$STANDALONE_DASHBOARD_DIR/dashboard.json"
}
generate_intermediate_dashboards_and_docs() {

View File

@@ -27,6 +27,8 @@ excludes = [
"src/servers/src/repeated_field.rs",
"src/servers/src/http/test_helpers.rs",
# enterprise
"src/common/meta/src/rpc/ddl/trigger.rs",
"src/operator/src/expr_helper/trigger.rs",
"src/sql/src/statements/create/trigger.rs",
"src/sql/src/statements/show/trigger.rs",
"src/sql/src/parsers/create_parser/trigger.rs",

View File

@@ -5,8 +5,12 @@ edition.workspace = true
license.workspace = true
[features]
pg_kvbackend = ["common-meta/pg_kvbackend"]
mysql_kvbackend = ["common-meta/mysql_kvbackend"]
default = [
"pg_kvbackend",
"mysql_kvbackend",
]
pg_kvbackend = ["common-meta/pg_kvbackend", "meta-srv/pg_kvbackend"]
mysql_kvbackend = ["common-meta/mysql_kvbackend", "meta-srv/mysql_kvbackend"]
[lints]
workspace = true

View File

@@ -10,7 +10,13 @@ name = "greptime"
path = "src/bin/greptime.rs"
[features]
default = ["servers/pprof", "servers/mem-prof", "meta-srv/pg_kvbackend", "meta-srv/mysql_kvbackend"]
default = [
"servers/pprof",
"servers/mem-prof",
"meta-srv/pg_kvbackend",
"meta-srv/mysql_kvbackend",
]
enterprise = ["common-meta/enterprise", "frontend/enterprise", "meta-srv/enterprise"]
tokio-console = ["common-telemetry/tokio-console"]
[lints]

View File

@@ -35,6 +35,8 @@ use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRe
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
use common_meta::ddl_manager::DdlManager;
#[cfg(feature = "enterprise")]
use common_meta::ddl_manager::TriggerDdlManagerRef;
use common_meta::key::flow::flow_state::FlowStat;
use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
@@ -69,6 +71,7 @@ use frontend::service_config::{
};
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
use mito2::config::MitoConfig;
use query::options::QueryOptions;
use serde::{Deserialize, Serialize};
use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
use servers::grpc::GrpcOptions;
@@ -153,6 +156,7 @@ pub struct StandaloneOptions {
pub init_regions_parallelism: usize,
pub max_in_flight_write_bytes: Option<ReadableSize>,
pub slow_query: Option<SlowQueryOptions>,
pub query: QueryOptions,
}
impl Default for StandaloneOptions {
@@ -185,6 +189,7 @@ impl Default for StandaloneOptions {
init_regions_parallelism: 16,
max_in_flight_write_bytes: None,
slow_query: Some(SlowQueryOptions::default()),
query: QueryOptions::default(),
}
}
}
@@ -240,6 +245,7 @@ impl StandaloneOptions {
grpc: cloned_opts.grpc,
init_regions_in_background: cloned_opts.init_regions_in_background,
init_regions_parallelism: cloned_opts.init_regions_parallelism,
query: cloned_opts.query,
..Default::default()
}
}
@@ -579,6 +585,8 @@ impl StartCommand {
flow_id_sequence,
));
#[cfg(feature = "enterprise")]
let trigger_ddl_manager: Option<TriggerDdlManagerRef> = plugins.get();
let ddl_task_executor = Self::create_ddl_task_executor(
procedure_manager.clone(),
node_manager.clone(),
@@ -587,6 +595,8 @@ impl StartCommand {
table_meta_allocator,
flow_metadata_manager,
flow_meta_allocator,
#[cfg(feature = "enterprise")]
trigger_ddl_manager,
)
.await?;
@@ -651,6 +661,7 @@ impl StartCommand {
})
}
#[allow(clippy::too_many_arguments)]
pub async fn create_ddl_task_executor(
procedure_manager: ProcedureManagerRef,
node_manager: NodeManagerRef,
@@ -659,6 +670,7 @@ impl StartCommand {
table_metadata_allocator: TableMetadataAllocatorRef,
flow_metadata_manager: FlowMetadataManagerRef,
flow_metadata_allocator: FlowMetadataAllocatorRef,
#[cfg(feature = "enterprise")] trigger_ddl_manager: Option<TriggerDdlManagerRef>,
) -> Result<ProcedureExecutorRef> {
let procedure_executor: ProcedureExecutorRef = Arc::new(
DdlManager::try_new(
@@ -675,6 +687,8 @@ impl StartCommand {
},
procedure_manager,
true,
#[cfg(feature = "enterprise")]
trigger_ddl_manager,
)
.context(error::InitDdlManagerSnafu)?,
);

View File

@@ -0,0 +1,90 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_macro::admin_fn;
use common_query::error::{
InvalidFuncArgsSnafu, MissingFlowServiceHandlerSnafu, Result, UnsupportedInputDataTypeSnafu,
};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::value::{Value, ValueRef};
use session::context::QueryContextRef;
use snafu::ensure;
use store_api::storage::ConcreteDataType;
use crate::handlers::FlowServiceHandlerRef;
use crate::helper::parse_catalog_flow;
fn adjust_signature() -> Signature {
Signature::exact(
vec![
ConcreteDataType::string_datatype(), // flow name
ConcreteDataType::uint64_datatype(), // min_run_interval in seconds
ConcreteDataType::uint64_datatype(), // max filter number per query
],
Volatility::Immutable,
)
}
#[admin_fn(
name = AdjustFlowFunction,
display_name = adjust_flow,
sig_fn = adjust_signature,
ret = uint64
)]
pub(crate) async fn adjust_flow(
flow_service_handler: &FlowServiceHandlerRef,
query_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
ensure!(
params.len() == 3,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect 3, have: {}",
params.len()
),
}
);
let (flow_name, min_run_interval, max_filter_num) = match (params[0], params[1], params[2]) {
(
ValueRef::String(flow_name),
ValueRef::UInt64(min_run_interval),
ValueRef::UInt64(max_filter_num),
) => (flow_name, min_run_interval, max_filter_num),
_ => {
return UnsupportedInputDataTypeSnafu {
function: "adjust_flow",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
}
};
let (catalog_name, flow_name) = parse_catalog_flow(flow_name, query_ctx)?;
let res = flow_service_handler
.adjust(
&catalog_name,
&flow_name,
min_run_interval,
max_filter_num as usize,
query_ctx.clone(),
)
.await?;
let affected_rows = res.affected_rows;
Ok(Value::from(affected_rows))
}

View File

@@ -26,6 +26,7 @@ use flush_compact_table::{CompactTableFunction, FlushTableFunction};
use migrate_region::MigrateRegionFunction;
use remove_region_follower::RemoveRegionFollowerFunction;
use crate::adjust_flow::AdjustFlowFunction;
use crate::flush_flow::FlushFlowFunction;
use crate::function_registry::FunctionRegistry;
@@ -43,5 +44,6 @@ impl AdminFunction {
registry.register_async(Arc::new(FlushTableFunction));
registry.register_async(Arc::new(CompactTableFunction));
registry.register_async(Arc::new(FlushFlowFunction));
registry.register_async(Arc::new(AdjustFlowFunction));
}
}

View File

@@ -12,21 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::ext::BoxedError;
use common_macro::admin_fn;
use common_query::error::{
ExecuteSnafu, InvalidFuncArgsSnafu, MissingFlowServiceHandlerSnafu, Result,
UnsupportedInputDataTypeSnafu,
InvalidFuncArgsSnafu, MissingFlowServiceHandlerSnafu, Result, UnsupportedInputDataTypeSnafu,
};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::value::{Value, ValueRef};
use session::context::QueryContextRef;
use snafu::{ensure, ResultExt};
use sql::parser::ParserContext;
use snafu::ensure;
use store_api::storage::ConcreteDataType;
use crate::handlers::FlowServiceHandlerRef;
use crate::helper::parse_catalog_flow;
fn flush_signature() -> Signature {
Signature::uniform(
@@ -47,20 +45,6 @@ pub(crate) async fn flush_flow(
query_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
let (catalog_name, flow_name) = parse_flush_flow(params, query_ctx)?;
let res = flow_service_handler
.flush(&catalog_name, &flow_name, query_ctx.clone())
.await?;
let affected_rows = res.affected_rows;
Ok(Value::from(affected_rows))
}
fn parse_flush_flow(
params: &[ValueRef<'_>],
query_ctx: &QueryContextRef,
) -> Result<(String, String)> {
ensure!(
params.len() == 1,
InvalidFuncArgsSnafu {
@@ -70,7 +54,6 @@ fn parse_flush_flow(
),
}
);
let ValueRef::String(flow_name) = params[0] else {
return UnsupportedInputDataTypeSnafu {
function: "flush_flow",
@@ -78,27 +61,14 @@ fn parse_flush_flow(
}
.fail();
};
let obj_name = ParserContext::parse_table_name(flow_name, query_ctx.sql_dialect())
.map_err(BoxedError::new)
.context(ExecuteSnafu)?;
let (catalog_name, flow_name) = parse_catalog_flow(flow_name, query_ctx)?;
let (catalog_name, flow_name) = match &obj_name.0[..] {
[flow_name] => (
query_ctx.current_catalog().to_string(),
flow_name.value.clone(),
),
[catalog, flow_name] => (catalog.value.clone(), flow_name.value.clone()),
_ => {
return InvalidFuncArgsSnafu {
err_msg: format!(
"expect flow name to be <catalog>.<flow-name> or <flow-name>, actual: {}",
obj_name
),
}
.fail()
}
};
Ok((catalog_name, flow_name))
let res = flow_service_handler
.flush(&catalog_name, &flow_name, query_ctx.clone())
.await?;
let affected_rows = res.affected_rows;
Ok(Value::from(affected_rows))
}
#[cfg(test)]
@@ -154,10 +124,7 @@ mod test {
("catalog.flow_name", ("catalog", "flow_name")),
];
for (input, expected) in testcases.iter() {
let args = vec![*input];
let args = args.into_iter().map(ValueRef::String).collect::<Vec<_>>();
let result = parse_flush_flow(&args, &QueryContext::arc()).unwrap();
let result = parse_catalog_flow(input, &QueryContext::arc()).unwrap();
assert_eq!(*expected, (result.0.as_str(), result.1.as_str()));
}
}

View File

@@ -87,6 +87,15 @@ pub trait FlowServiceHandler: Send + Sync {
flow: &str,
ctx: QueryContextRef,
) -> Result<api::v1::flow::FlowResponse>;
async fn adjust(
&self,
catalog: &str,
flow: &str,
min_run_interval_secs: u64,
max_filter_num_per_query: usize,
ctx: QueryContextRef,
) -> Result<api::v1::flow::FlowResponse>;
}
pub type TableMutationHandlerRef = Arc<dyn TableMutationHandler>;

View File

@@ -12,12 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_query::error::{InvalidInputTypeSnafu, Result};
use common_error::ext::BoxedError;
use common_query::error::{ExecuteSnafu, InvalidFuncArgsSnafu, InvalidInputTypeSnafu, Result};
use common_query::prelude::{Signature, TypeSignature, Volatility};
use datatypes::prelude::ConcreteDataType;
use datatypes::types::cast::cast;
use datatypes::value::ValueRef;
use session::context::QueryContextRef;
use snafu::ResultExt;
use sql::parser::ParserContext;
/// Create a function signature with oneof signatures of interleaving two arguments.
pub fn one_of_sigs2(args1: Vec<ConcreteDataType>, args2: Vec<ConcreteDataType>) -> Signature {
@@ -43,3 +46,30 @@ pub fn cast_u64(value: &ValueRef) -> Result<Option<u64>> {
})
.map(|v| v.as_u64())
}
pub fn parse_catalog_flow(
flow_name: &str,
query_ctx: &QueryContextRef,
) -> Result<(String, String)> {
let obj_name = ParserContext::parse_table_name(flow_name, query_ctx.sql_dialect())
.map_err(BoxedError::new)
.context(ExecuteSnafu)?;
let (catalog_name, flow_name) = match &obj_name.0[..] {
[flow_name] => (
query_ctx.current_catalog().to_string(),
flow_name.value.clone(),
),
[catalog, flow_name] => (catalog.value.clone(), flow_name.value.clone()),
_ => {
return InvalidFuncArgsSnafu {
err_msg: format!(
"expect flow name to be <catalog>.<flow-name> or <flow-name>, actual: {}",
obj_name
),
}
.fail()
}
};
Ok((catalog_name, flow_name))
}

View File

@@ -15,6 +15,7 @@
#![feature(let_chains)]
#![feature(try_blocks)]
mod adjust_flow;
mod admin;
mod flush_flow;
mod macros;

View File

@@ -148,6 +148,17 @@ impl FunctionState {
) -> Result<api::v1::flow::FlowResponse> {
todo!()
}
async fn adjust(
&self,
_catalog: &str,
_flow: &str,
_min_run_interval_secs: u64,
_max_filter_num_per_query: usize,
_ctx: QueryContextRef,
) -> Result<api::v1::flow::FlowResponse> {
todo!()
}
}
Self {

View File

@@ -8,6 +8,7 @@ license.workspace = true
testing = []
pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
mysql_kvbackend = ["dep:sqlx", "dep:backon"]
enterprise = []
[lints]
workspace = true

View File

@@ -47,6 +47,10 @@ use crate::error::{
use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
#[cfg(feature = "enterprise")]
use crate::rpc::ddl::trigger::CreateTriggerTask;
#[cfg(feature = "enterprise")]
use crate::rpc::ddl::DdlTask::CreateTrigger;
use crate::rpc::ddl::DdlTask::{
AlterDatabase, AlterLogicalTables, AlterTable, CreateDatabase, CreateFlow, CreateLogicalTables,
CreateTable, CreateView, DropDatabase, DropFlow, DropLogicalTables, DropTable, DropView,
@@ -70,8 +74,29 @@ pub type BoxedProcedureLoaderFactory = dyn Fn(DdlContext) -> BoxedProcedureLoade
pub struct DdlManager {
ddl_context: DdlContext,
procedure_manager: ProcedureManagerRef,
#[cfg(feature = "enterprise")]
trigger_ddl_manager: Option<TriggerDdlManagerRef>,
}
/// This trait is responsible for handling DDL tasks about triggers. e.g.,
/// create trigger, drop trigger, etc.
#[cfg(feature = "enterprise")]
#[async_trait::async_trait]
pub trait TriggerDdlManager: Send + Sync {
async fn create_trigger(
&self,
create_trigger_task: CreateTriggerTask,
procedure_manager: ProcedureManagerRef,
ddl_context: DdlContext,
query_context: QueryContext,
) -> Result<SubmitDdlTaskResponse>;
fn as_any(&self) -> &dyn std::any::Any;
}
#[cfg(feature = "enterprise")]
pub type TriggerDdlManagerRef = Arc<dyn TriggerDdlManager>;
macro_rules! procedure_loader_entry {
($procedure:ident) => {
(
@@ -100,10 +125,13 @@ impl DdlManager {
ddl_context: DdlContext,
procedure_manager: ProcedureManagerRef,
register_loaders: bool,
#[cfg(feature = "enterprise")] trigger_ddl_manager: Option<TriggerDdlManagerRef>,
) -> Result<Self> {
let manager = Self {
ddl_context,
procedure_manager,
#[cfg(feature = "enterprise")]
trigger_ddl_manager,
};
if register_loaders {
manager.register_loaders()?;
@@ -669,6 +697,28 @@ async fn handle_create_flow_task(
})
}
#[cfg(feature = "enterprise")]
async fn handle_create_trigger_task(
ddl_manager: &DdlManager,
create_trigger_task: CreateTriggerTask,
query_context: QueryContext,
) -> Result<SubmitDdlTaskResponse> {
let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
return UnsupportedSnafu {
operation: "create trigger",
}
.fail();
};
m.create_trigger(
create_trigger_task,
ddl_manager.procedure_manager.clone(),
ddl_manager.ddl_context.clone(),
query_context,
)
.await
}
async fn handle_alter_logical_table_tasks(
ddl_manager: &DdlManager,
alter_table_tasks: Vec<AlterTableTask>,
@@ -777,6 +827,15 @@ impl ProcedureExecutor for DdlManager {
handle_create_flow_task(self, create_flow_task, request.query_context.into())
.await
}
#[cfg(feature = "enterprise")]
CreateTrigger(create_trigger_task) => {
handle_create_trigger_task(
self,
create_trigger_task,
request.query_context.into(),
)
.await
}
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
CreateView(create_view_task) => {
handle_create_view_task(self, create_view_task).await
@@ -905,6 +964,8 @@ mod tests {
},
procedure_manager.clone(),
true,
#[cfg(feature = "enterprise")]
None,
);
let expected_loaders = vec![

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(feature = "enterprise")]
pub mod trigger;
use std::collections::{HashMap, HashSet};
use std::result;
@@ -68,6 +71,8 @@ pub enum DdlTask {
DropFlow(DropFlowTask),
CreateView(CreateViewTask),
DropView(DropViewTask),
#[cfg(feature = "enterprise")]
CreateTrigger(trigger::CreateTriggerTask),
}
impl DdlTask {
@@ -242,6 +247,18 @@ impl TryFrom<Task> for DdlTask {
Task::DropFlowTask(drop_flow) => Ok(DdlTask::DropFlow(drop_flow.try_into()?)),
Task::CreateViewTask(create_view) => Ok(DdlTask::CreateView(create_view.try_into()?)),
Task::DropViewTask(drop_view) => Ok(DdlTask::DropView(drop_view.try_into()?)),
Task::CreateTriggerTask(create_trigger) => {
#[cfg(feature = "enterprise")]
return Ok(DdlTask::CreateTrigger(create_trigger.try_into()?));
#[cfg(not(feature = "enterprise"))]
{
let _ = create_trigger;
crate::error::UnsupportedSnafu {
operation: "create trigger",
}
.fail()
}
}
}
}
}
@@ -292,6 +309,8 @@ impl TryFrom<SubmitDdlTaskRequest> for PbDdlTaskRequest {
DdlTask::DropFlow(task) => Task::DropFlowTask(task.into()),
DdlTask::CreateView(task) => Task::CreateViewTask(task.try_into()?),
DdlTask::DropView(task) => Task::DropViewTask(task.into()),
#[cfg(feature = "enterprise")]
DdlTask::CreateTrigger(task) => Task::CreateTriggerTask(task.into()),
};
Ok(Self {

View File

@@ -0,0 +1,276 @@
use std::collections::HashMap;
use std::time::Duration;
use api::v1::meta::CreateTriggerTask as PbCreateTriggerTask;
use api::v1::notify_channel::ChannelType as PbChannelType;
use api::v1::{
CreateTriggerExpr, NotifyChannel as PbNotifyChannel, WebhookOptions as PbWebhookOptions,
};
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use crate::error;
use crate::error::Result;
use crate::rpc::ddl::DdlTask;
// Create trigger
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CreateTriggerTask {
pub catalog_name: String,
pub trigger_name: String,
pub if_not_exists: bool,
pub sql: String,
pub channels: Vec<NotifyChannel>,
pub labels: HashMap<String, String>,
pub annotations: HashMap<String, String>,
pub interval: Duration,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NotifyChannel {
pub name: String,
pub channel_type: ChannelType,
}
/// The available channel enum for sending trigger notifications.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum ChannelType {
Webhook(WebhookOptions),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct WebhookOptions {
/// The URL of the AlertManager API endpoint.
///
/// e.g., "http://localhost:9093".
pub url: String,
/// Configuration options for the AlertManager webhook. e.g., timeout, etc.
pub opts: HashMap<String, String>,
}
impl From<CreateTriggerTask> for PbCreateTriggerTask {
fn from(task: CreateTriggerTask) -> Self {
let channels = task
.channels
.into_iter()
.map(PbNotifyChannel::from)
.collect();
let expr = CreateTriggerExpr {
catalog_name: task.catalog_name,
trigger_name: task.trigger_name,
create_if_not_exists: task.if_not_exists,
sql: task.sql,
channels,
labels: task.labels,
annotations: task.annotations,
interval: task.interval.as_secs(),
};
PbCreateTriggerTask {
create_trigger: Some(expr),
}
}
}
impl TryFrom<PbCreateTriggerTask> for CreateTriggerTask {
type Error = error::Error;
fn try_from(task: PbCreateTriggerTask) -> Result<Self> {
let expr = task.create_trigger.context(error::InvalidProtoMsgSnafu {
err_msg: "expected create_trigger",
})?;
let channels = expr
.channels
.into_iter()
.map(NotifyChannel::try_from)
.collect::<Result<Vec<_>>>()?;
let task = CreateTriggerTask {
catalog_name: expr.catalog_name,
trigger_name: expr.trigger_name,
if_not_exists: expr.create_if_not_exists,
sql: expr.sql,
channels,
labels: expr.labels,
annotations: expr.annotations,
interval: Duration::from_secs(expr.interval),
};
Ok(task)
}
}
impl From<NotifyChannel> for PbNotifyChannel {
fn from(channel: NotifyChannel) -> Self {
let NotifyChannel { name, channel_type } = channel;
let channel_type = match channel_type {
ChannelType::Webhook(options) => PbChannelType::Webhook(PbWebhookOptions {
url: options.url,
opts: options.opts,
}),
};
PbNotifyChannel {
name,
channel_type: Some(channel_type),
}
}
}
impl TryFrom<PbNotifyChannel> for NotifyChannel {
type Error = error::Error;
fn try_from(channel: PbNotifyChannel) -> Result<Self> {
let PbNotifyChannel { name, channel_type } = channel;
let channel_type = channel_type.context(error::InvalidProtoMsgSnafu {
err_msg: "expected channel_type",
})?;
let channel_type = match channel_type {
PbChannelType::Webhook(options) => ChannelType::Webhook(WebhookOptions {
url: options.url,
opts: options.opts,
}),
};
Ok(NotifyChannel { name, channel_type })
}
}
impl DdlTask {
/// Creates a [`DdlTask`] to create a trigger.
pub fn new_create_trigger(expr: CreateTriggerTask) -> Self {
DdlTask::CreateTrigger(expr)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_convert_create_trigger_task() {
let original = CreateTriggerTask {
catalog_name: "test_catalog".to_string(),
trigger_name: "test_trigger".to_string(),
if_not_exists: true,
sql: "SELECT * FROM test".to_string(),
channels: vec![
NotifyChannel {
name: "channel1".to_string(),
channel_type: ChannelType::Webhook(WebhookOptions {
url: "http://localhost:9093".to_string(),
opts: HashMap::from([("timeout".to_string(), "30s".to_string())]),
}),
},
NotifyChannel {
name: "channel2".to_string(),
channel_type: ChannelType::Webhook(WebhookOptions {
url: "http://alertmanager:9093".to_string(),
opts: HashMap::new(),
}),
},
],
labels: vec![
("key1".to_string(), "value1".to_string()),
("key2".to_string(), "value2".to_string()),
]
.into_iter()
.collect(),
annotations: vec![
("summary".to_string(), "Test alert".to_string()),
("description".to_string(), "This is a test".to_string()),
]
.into_iter()
.collect(),
interval: Duration::from_secs(60),
};
let pb_task: PbCreateTriggerTask = original.clone().into();
let expr = pb_task.create_trigger.as_ref().unwrap();
assert_eq!(expr.catalog_name, "test_catalog");
assert_eq!(expr.trigger_name, "test_trigger");
assert!(expr.create_if_not_exists);
assert_eq!(expr.sql, "SELECT * FROM test");
assert_eq!(expr.channels.len(), 2);
assert_eq!(expr.labels.len(), 2);
assert_eq!(expr.labels.get("key1").unwrap(), "value1");
assert_eq!(expr.labels.get("key2").unwrap(), "value2");
assert_eq!(expr.annotations.len(), 2);
assert_eq!(expr.annotations.get("summary").unwrap(), "Test alert");
assert_eq!(
expr.annotations.get("description").unwrap(),
"This is a test"
);
assert_eq!(expr.interval, 60);
let round_tripped = CreateTriggerTask::try_from(pb_task).unwrap();
assert_eq!(original.catalog_name, round_tripped.catalog_name);
assert_eq!(original.trigger_name, round_tripped.trigger_name);
assert_eq!(original.if_not_exists, round_tripped.if_not_exists);
assert_eq!(original.sql, round_tripped.sql);
assert_eq!(original.channels.len(), round_tripped.channels.len());
assert_eq!(&original.channels[0], &round_tripped.channels[0]);
assert_eq!(&original.channels[1], &round_tripped.channels[1]);
assert_eq!(original.labels, round_tripped.labels);
assert_eq!(original.annotations, round_tripped.annotations);
assert_eq!(original.interval, round_tripped.interval);
// Invalid, since create_trigger is None and it's required.
let invalid_task = PbCreateTriggerTask {
create_trigger: None,
};
let result = CreateTriggerTask::try_from(invalid_task);
assert!(result.is_err());
}
#[test]
fn test_convert_notify_channel() {
let original = NotifyChannel {
name: "test_channel".to_string(),
channel_type: ChannelType::Webhook(WebhookOptions {
url: "http://localhost:9093".to_string(),
opts: HashMap::new(),
}),
};
let pb_channel: PbNotifyChannel = original.clone().into();
match pb_channel.channel_type.as_ref().unwrap() {
PbChannelType::Webhook(options) => {
assert_eq!(pb_channel.name, "test_channel");
assert_eq!(options.url, "http://localhost:9093");
assert!(options.opts.is_empty());
}
}
let round_tripped = NotifyChannel::try_from(pb_channel).unwrap();
assert_eq!(original, round_tripped);
// Test with timeout is None.
let no_timeout = NotifyChannel {
name: "no_timeout".to_string(),
channel_type: ChannelType::Webhook(WebhookOptions {
url: "http://localhost:9093".to_string(),
opts: HashMap::new(),
}),
};
let pb_no_timeout: PbNotifyChannel = no_timeout.clone().into();
match pb_no_timeout.channel_type.as_ref().unwrap() {
PbChannelType::Webhook(options) => {
assert_eq!(options.url, "http://localhost:9093");
}
}
let round_tripped_no_timeout = NotifyChannel::try_from(pb_no_timeout).unwrap();
assert_eq!(no_timeout, round_tripped_no_timeout);
// Invalid, since channel_type is None and it's required.
let invalid_channel = PbNotifyChannel {
name: "invalid".to_string(),
channel_type: None,
};
let result = NotifyChannel::try_from(invalid_channel);
assert!(result.is_err());
}
}

View File

@@ -61,6 +61,7 @@ prost.workspace = true
query.workspace = true
rand.workspace = true
serde.workspace = true
serde_json.workspace = true
servers.workspace = true
session.workspace = true
smallvec.workspace = true

View File

@@ -18,7 +18,7 @@ use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use api::v1::flow::{
flow_request, CreateRequest, DropRequest, FlowRequest, FlowResponse, FlushFlow,
flow_request, AdjustFlow, CreateRequest, DropRequest, FlowRequest, FlowResponse, FlushFlow,
};
use api::v1::region::InsertRequests;
use catalog::CatalogManager;
@@ -32,6 +32,7 @@ use common_telemetry::{error, info, trace, warn};
use datatypes::value::Value;
use futures::TryStreamExt;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use session::context::QueryContextBuilder;
use snafu::{ensure, IntoError, OptionExt, ResultExt};
use store_api::storage::{RegionId, TableId};
@@ -809,6 +810,25 @@ impl common_meta::node_manager::Flownode for FlowDualEngine {
..Default::default()
})
}
Some(flow_request::Body::Adjust(AdjustFlow { flow_id, options })) => {
#[derive(Debug, Serialize, Deserialize)]
struct Options {
min_run_interval_secs: u64,
max_filter_num_per_query: usize,
}
let options: Options = serde_json::from_str(&options).with_context(|_| {
common_meta::error::DeserializeFromJsonSnafu { input: options }
})?;
self.batching_engine
.adjust_flow(
flow_id.unwrap().id as u64,
options.min_run_interval_secs,
options.max_filter_num_per_query,
)
.await
.map_err(to_meta_err(snafu::location!()))?;
Ok(Default::default())
}
other => common_meta::error::InvalidFlowRequestBodySnafu { body: other }.fail(),
}
}
@@ -841,93 +861,6 @@ fn to_meta_err(
}
}
#[async_trait::async_trait]
impl common_meta::node_manager::Flownode for StreamingEngine {
async fn handle(&self, request: FlowRequest) -> MetaResult<FlowResponse> {
let query_ctx = request
.header
.and_then(|h| h.query_context)
.map(|ctx| ctx.into());
match request.body {
Some(flow_request::Body::Create(CreateRequest {
flow_id: Some(task_id),
source_table_ids,
sink_table_name: Some(sink_table_name),
create_if_not_exists,
expire_after,
comment,
sql,
flow_options,
or_replace,
})) => {
let source_table_ids = source_table_ids.into_iter().map(|id| id.id).collect_vec();
let sink_table_name = [
sink_table_name.catalog_name,
sink_table_name.schema_name,
sink_table_name.table_name,
];
let expire_after = expire_after.map(|e| e.value);
let args = CreateFlowArgs {
flow_id: task_id.id as u64,
sink_table_name,
source_table_ids,
create_if_not_exists,
or_replace,
expire_after,
comment: Some(comment),
sql: sql.clone(),
flow_options,
query_ctx,
};
let ret = self
.create_flow(args)
.await
.map_err(BoxedError::new)
.with_context(|_| CreateFlowSnafu { sql: sql.clone() })
.map_err(to_meta_err(snafu::location!()))?;
METRIC_FLOW_TASK_COUNT.inc();
Ok(FlowResponse {
affected_flows: ret
.map(|id| greptime_proto::v1::FlowId { id: id as u32 })
.into_iter()
.collect_vec(),
..Default::default()
})
}
Some(flow_request::Body::Drop(DropRequest {
flow_id: Some(flow_id),
})) => {
self.remove_flow(flow_id.id as u64)
.await
.map_err(to_meta_err(snafu::location!()))?;
METRIC_FLOW_TASK_COUNT.dec();
Ok(Default::default())
}
Some(flow_request::Body::Flush(FlushFlow {
flow_id: Some(flow_id),
})) => {
let row = self
.flush_flow_inner(flow_id.id as u64)
.await
.map_err(to_meta_err(snafu::location!()))?;
Ok(FlowResponse {
affected_flows: vec![flow_id],
affected_rows: row as u64,
..Default::default()
})
}
other => common_meta::error::InvalidFlowRequestBodySnafu { body: other }.fail(),
}
}
async fn handle_inserts(&self, request: InsertRequests) -> MetaResult<FlowResponse> {
self.handle_inserts_inner(request)
.await
.map(|_| Default::default())
.map_err(to_meta_err(snafu::location!()))
}
}
impl FlowEngine for StreamingEngine {
async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
self.create_flow_inner(args).await

View File

@@ -388,6 +388,20 @@ impl BatchingEngine {
pub async fn flow_exist_inner(&self, flow_id: FlowId) -> bool {
self.tasks.read().await.contains_key(&flow_id)
}
pub async fn adjust_flow(
&self,
flow_id: FlowId,
min_run_interval_secs: u64,
max_filter_num_per_query: usize,
) -> Result<(), Error> {
let task = self.tasks.read().await.get(&flow_id).cloned();
let task = task.with_context(|| FlowNotFoundSnafu { id: flow_id })?;
debug!("Adjusting flow {flow_id} with min_run_interval_secs={} and max_filter_num_per_query={}", min_run_interval_secs, max_filter_num_per_query);
task.adjust(min_run_interval_secs, max_filter_num_per_query);
Ok(())
}
}
impl FlowEngine for BatchingEngine {

View File

@@ -30,6 +30,10 @@ use crate::batching_mode::task::BatchingTask;
use crate::batching_mode::time_window::TimeWindowExpr;
use crate::batching_mode::MIN_REFRESH_DURATION;
use crate::error::{DatatypesSnafu, InternalSnafu, TimeSnafu, UnexpectedSnafu};
use crate::metrics::{
METRIC_FLOW_BATCHING_ENGINE_QUERY_TIME_RANGE, METRIC_FLOW_BATCHING_ENGINE_QUERY_WINDOW_CNT,
METRIC_FLOW_BATCHING_ENGINE_STALLED_QUERY_WINDOW_CNT,
};
use crate::{Error, FlowId};
/// The state of the [`BatchingTask`].
@@ -49,6 +53,11 @@ pub struct TaskState {
pub(crate) shutdown_rx: oneshot::Receiver<()>,
/// Task handle
pub(crate) task_handle: Option<tokio::task::JoinHandle<()>>,
/// min run interval in seconds
pub(crate) min_run_interval: Option<u64>,
/// max filter number per query
pub(crate) max_filter_num: Option<usize>,
}
impl TaskState {
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
@@ -60,6 +69,8 @@ impl TaskState {
exec_state: ExecState::Idle,
shutdown_rx,
task_handle: None,
min_run_interval: None,
max_filter_num: None,
}
}
@@ -84,20 +95,17 @@ impl TaskState {
pub fn get_next_start_query_time(
&self,
flow_id: FlowId,
time_window_size: &Option<Duration>,
_time_window_size: &Option<Duration>,
max_timeout: Option<Duration>,
) -> Instant {
let last_duration = max_timeout
let next_duration = max_timeout
.unwrap_or(self.last_query_duration)
.min(self.last_query_duration)
.max(MIN_REFRESH_DURATION);
let next_duration = time_window_size
.map(|t| {
let half = t / 2;
half.max(last_duration)
})
.unwrap_or(last_duration);
.max(
self.min_run_interval
.map(|s| Duration::from_secs(s))
.unwrap_or(MIN_REFRESH_DURATION),
);
// if have dirty time window, execute immediately to clean dirty time window
if self.dirty_time_windows.windows.is_empty() {
@@ -127,10 +135,10 @@ impl DirtyTimeWindows {
/// Time window merge distance
///
/// TODO(discord9): make those configurable
const MERGE_DIST: i32 = 3;
pub const MERGE_DIST: i32 = 3;
/// Maximum number of filters allowed in a single query
const MAX_FILTER_NUM: usize = 20;
pub const MAX_FILTER_NUM: usize = 20;
/// Add lower bounds to the dirty time windows. Upper bounds are ignored.
///
@@ -154,11 +162,16 @@ impl DirtyTimeWindows {
}
/// Generate all filter expressions consuming all time windows
///
/// there is two limits:
/// - shouldn't return a too long time range(<=`window_size * window_cnt`), so that the query can be executed in a reasonable time
/// - shouldn't return too many time range exprs, so that the query can be parsed properly instead of causing parser to overflow
pub fn gen_filter_exprs(
&mut self,
col_name: &str,
expire_lower_bound: Option<Timestamp>,
window_size: chrono::Duration,
window_cnt: usize,
flow_id: FlowId,
task_ctx: Option<&BatchingTask>,
) -> Result<Option<datafusion_expr::Expr>, Error> {
@@ -196,12 +209,33 @@ impl DirtyTimeWindows {
}
}
// get the first `MAX_FILTER_NUM` time windows
let nth = self
.windows
.iter()
.nth(Self::MAX_FILTER_NUM)
.map(|(key, _)| *key);
// get the first `window_cnt` time windows
let max_time_range = window_size * window_cnt as i32;
let nth = {
let mut cur_time_range = chrono::Duration::zero();
let mut nth_key = None;
for (idx, (start, end)) in self.windows.iter().enumerate() {
// if time range is too long, stop
if cur_time_range > max_time_range {
nth_key = Some(*start);
break;
}
// if we have enough time windows, stop
if idx >= window_cnt {
nth_key = Some(*start);
break;
}
if let Some(end) = end {
if let Some(x) = end.sub(start) {
cur_time_range += x;
}
}
}
nth_key
};
let first_nth = {
if let Some(nth) = nth {
let mut after = self.windows.split_off(&nth);
@@ -213,6 +247,28 @@ impl DirtyTimeWindows {
}
};
METRIC_FLOW_BATCHING_ENGINE_QUERY_WINDOW_CNT
.with_label_values(&[flow_id.to_string().as_str()])
.observe(first_nth.len() as f64);
METRIC_FLOW_BATCHING_ENGINE_STALLED_QUERY_WINDOW_CNT
.with_label_values(&[flow_id.to_string().as_str()])
.observe(self.windows.len() as f64);
let full_time_range = first_nth
.iter()
.fold(chrono::Duration::zero(), |acc, (start, end)| {
if let Some(end) = end {
acc + end.sub(start).unwrap_or(chrono::Duration::zero())
} else {
acc
}
})
.num_seconds() as f64;
METRIC_FLOW_BATCHING_ENGINE_QUERY_TIME_RANGE
.with_label_values(&[flow_id.to_string().as_str()])
.observe(full_time_range);
let mut expr_lst = vec![];
for (start, end) in first_nth.into_iter() {
// align using time window exprs
@@ -274,6 +330,8 @@ impl DirtyTimeWindows {
}
/// Merge time windows that overlaps or get too close
///
/// TODO(discord9): not merge and prefer to send smaller time windows? how?
pub fn merge_dirty_time_windows(
&mut self,
window_size: chrono::Duration,
@@ -472,7 +530,14 @@ mod test {
.unwrap();
assert_eq!(expected, dirty.windows);
let filter_expr = dirty
.gen_filter_exprs("ts", expire_lower_bound, window_size, 0, None)
.gen_filter_exprs(
"ts",
expire_lower_bound,
window_size,
DirtyTimeWindows::MAX_FILTER_NUM,
0,
None,
)
.unwrap();
let unparser = datafusion::sql::unparser::Unparser::default();

View File

@@ -46,7 +46,7 @@ use tokio::time::Instant;
use crate::adapter::{AUTO_CREATED_PLACEHOLDER_TS_COL, AUTO_CREATED_UPDATE_AT_TS_COL};
use crate::batching_mode::frontend_client::FrontendClient;
use crate::batching_mode::state::TaskState;
use crate::batching_mode::state::{DirtyTimeWindows, TaskState};
use crate::batching_mode::time_window::TimeWindowExpr;
use crate::batching_mode::utils::{
get_table_info_df_schema, sql_to_df_plan, AddAutoColumnRewriter, AddFilterRewriter,
@@ -144,6 +144,12 @@ impl BatchingTask {
})
}
pub fn adjust(&self, min_run_interval_secs: u64, max_filter_num_per_query: usize) {
let mut state = self.state.write().unwrap();
state.min_run_interval = Some(min_run_interval_secs);
state.max_filter_num = Some(max_filter_num_per_query);
}
/// mark time window range (now - expire_after, now) as dirty (or (0, now) if expire_after not set)
///
/// useful for flush_flow to flush dirty time windows range
@@ -387,7 +393,6 @@ impl BatchingTask {
METRIC_FLOW_BATCHING_ENGINE_SLOW_QUERY
.with_label_values(&[
flow_id.to_string().as_str(),
&plan.to_string(),
&peer_desc.unwrap_or_default().to_string(),
])
.observe(elapsed.as_secs_f64());
@@ -429,16 +434,23 @@ impl BatchingTask {
}
}
let mut new_query = None;
let mut gen_and_exec = async || {
new_query = self.gen_insert_plan(&engine).await?;
if let Some(new_query) = &new_query {
self.execute_logical_plan(&frontend_client, new_query).await
} else {
Ok(None)
let new_query = match self.gen_insert_plan(&engine).await {
Ok(new_query) => new_query,
Err(err) => {
common_telemetry::error!(err; "Failed to generate query for flow={}", self.config.flow_id);
// also sleep for a little while before try again to prevent flooding logs
tokio::time::sleep(MIN_REFRESH_DURATION).await;
continue;
}
};
match gen_and_exec().await {
let res = if let Some(new_query) = &new_query {
self.execute_logical_plan(&frontend_client, new_query).await
} else {
Ok(None)
};
match res {
// normal execute, sleep for some time before doing next query
Ok(Some(_)) => {
let sleep_until = {
@@ -574,18 +586,20 @@ impl BatchingTask {
),
})?;
let expr = self
.state
.write()
.unwrap()
.dirty_time_windows
.gen_filter_exprs(
let expr = {
let mut state = self.state.write().unwrap();
let max_window_cnt = state
.max_filter_num
.unwrap_or(DirtyTimeWindows::MAX_FILTER_NUM);
state.dirty_time_windows.gen_filter_exprs(
&col_name,
Some(l),
window_size,
max_window_cnt,
self.config.flow_id,
Some(self),
)?;
)?
};
debug!(
"Flow id={:?}, Generated filter expr: {:?}",

View File

@@ -15,9 +15,14 @@
//! Scalar expressions.
use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc;
use arrow::array::{make_array, ArrayData, ArrayRef};
use arrow::array::{make_array, ArrayData, ArrayRef, BooleanArray};
use arrow::buffer::BooleanBuffer;
use arrow::compute::or_kleene;
use common_error::ext::BoxedError;
use datafusion::physical_expr_common::datum::compare_with_eq;
use datafusion_common::DataFusionError;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::value::Value;
use datatypes::vectors::{BooleanVector, Helper, VectorRef};
@@ -92,6 +97,10 @@ pub enum ScalarExpr {
then: Box<ScalarExpr>,
els: Box<ScalarExpr>,
},
InList {
expr: Box<ScalarExpr>,
list: Vec<ScalarExpr>,
},
}
impl ScalarExpr {
@@ -137,6 +146,7 @@ impl ScalarExpr {
.context(crate::error::ExternalSnafu)?;
Ok(ColumnType::new_nullable(typ))
}
ScalarExpr::InList { expr, .. } => expr.typ(context),
}
}
}
@@ -222,9 +232,57 @@ impl ScalarExpr {
exprs,
} => df_scalar_fn.eval_batch(batch, exprs),
ScalarExpr::If { cond, then, els } => Self::eval_if_then(batch, cond, then, els),
ScalarExpr::InList { expr, list } => Self::eval_in_list(batch, expr, list),
}
}
fn eval_in_list(
batch: &Batch,
expr: &ScalarExpr,
list: &[ScalarExpr],
) -> Result<VectorRef, EvalError> {
let eval_list = list
.iter()
.map(|e| e.eval_batch(batch))
.collect::<Result<Vec<_>, _>>()?;
let eval_expr = expr.eval_batch(batch)?;
ensure!(
eval_list
.iter()
.all(|v| v.data_type() == eval_expr.data_type()),
TypeMismatchSnafu {
expected: eval_expr.data_type(),
actual: eval_list
.iter()
.find(|v| v.data_type() != eval_expr.data_type())
.map(|v| v.data_type())
.unwrap(),
}
);
let lhs = eval_expr.to_arrow_array();
let found = eval_list
.iter()
.map(|v| v.to_arrow_array())
.try_fold(
BooleanArray::new(BooleanBuffer::new_unset(batch.row_count()), None),
|result, in_list_elem| -> Result<BooleanArray, DataFusionError> {
let rhs = compare_with_eq(&lhs, &in_list_elem, false)?;
Ok(or_kleene(&result, &rhs)?)
},
)
.with_context(|_| crate::expr::error::DatafusionSnafu {
context: "Failed to compare eval_expr with eval_list",
})?;
let res = BooleanVector::from(found);
Ok(Arc::new(res))
}
/// NOTE: this if then eval impl assume all given expr are pure, and will not change the state of the world
/// since it will evaluate both then and else branch and filter the result
fn eval_if_then(
@@ -337,6 +395,15 @@ impl ScalarExpr {
df_scalar_fn,
exprs,
} => df_scalar_fn.eval(values, exprs),
ScalarExpr::InList { expr, list } => {
let eval_expr = expr.eval(values)?;
let eval_list = list
.iter()
.map(|v| v.eval(values))
.collect::<Result<Vec<_>, _>>()?;
let found = eval_list.iter().any(|item| *item == eval_expr);
Ok(Value::Boolean(found))
}
}
}
@@ -514,6 +581,13 @@ impl ScalarExpr {
}
Ok(())
}
ScalarExpr::InList { expr, list } => {
f(expr)?;
for item in list {
f(item)?;
}
Ok(())
}
}
}
@@ -558,6 +632,13 @@ impl ScalarExpr {
}
Ok(())
}
ScalarExpr::InList { expr, list } => {
f(expr)?;
for item in list {
f(item)?;
}
Ok(())
}
}
}
}

View File

@@ -38,10 +38,34 @@ lazy_static! {
pub static ref METRIC_FLOW_BATCHING_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
"greptime_flow_batching_engine_slow_query_secs",
"flow batching engine slow query(seconds)",
&["flow_id", "sql", "peer"],
&["flow_id", "peer"],
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_BATCHING_ENGINE_STALLED_QUERY_WINDOW_CNT: HistogramVec =
register_histogram_vec!(
"greptime_flow_batching_engine_stalled_query_window_cnt",
"flow batching engine stalled query time window count",
&["flow_id"],
vec![0.0, 5., 10., 20., 40.]
)
.unwrap();
pub static ref METRIC_FLOW_BATCHING_ENGINE_QUERY_WINDOW_CNT: HistogramVec =
register_histogram_vec!(
"greptime_flow_batching_engine_query_window_cnt",
"flow batching engine query time window count",
&["flow_id"],
vec![0.0, 5., 10., 20., 40.]
)
.unwrap();
pub static ref METRIC_FLOW_BATCHING_ENGINE_QUERY_TIME_RANGE: HistogramVec =
register_histogram_vec!(
"greptime_flow_batching_engine_query_time_range_secs",
"flow batching engine query time range(seconds)",
&["flow_id"],
vec![60., 4. * 60., 16. * 60., 64. * 60., 256. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(

View File

@@ -596,7 +596,7 @@ impl FrontendInvoker {
.start_timer();
self.inserter
.handle_row_inserts(requests, ctx, &self.statement_executor, false)
.handle_row_inserts(requests, ctx, &self.statement_executor, false, false)
.await
.map_err(BoxedError::new)
.context(common_frontend::error::ExternalSnafu)

View File

@@ -476,11 +476,27 @@ impl TypedExpr {
let substrait_expr = s.value.as_ref().with_context(|| InvalidQuerySnafu {
reason: "SingularOrList expression without value",
})?;
let typed_expr =
TypedExpr::from_substrait_rex(substrait_expr, input_schema, extensions).await?;
// Note that we didn't impl support to in list expr
if !s.options.is_empty() {
return not_impl_err!("In list expression is not supported");
let mut list = Vec::with_capacity(s.options.len());
for opt in s.options.iter() {
let opt_expr =
TypedExpr::from_substrait_rex(opt, input_schema, extensions).await?;
list.push(opt_expr.expr);
}
let in_list_expr = ScalarExpr::InList {
expr: Box::new(typed_expr.expr),
list,
};
Ok(TypedExpr::new(
in_list_expr,
ColumnType::new_nullable(CDT::boolean_datatype()),
))
} else {
Ok(typed_expr)
}
TypedExpr::from_substrait_rex(substrait_expr, input_schema, extensions).await
}
Some(RexType::Selection(field_ref)) => match &field_ref.reference_type {
Some(DirectReference(direct)) => match &direct.reference_type.as_ref() {

View File

@@ -6,7 +6,7 @@ license.workspace = true
[features]
testing = []
enterprise = ["operator/enterprise", "sql/enterprise"]
enterprise = ["common-meta/enterprise", "operator/enterprise", "sql/enterprise"]
[lints]
workspace = true

View File

@@ -76,7 +76,7 @@ impl GrpcQueryHandler for Instance {
let output = match request {
Request::Inserts(requests) => self.handle_inserts(requests, ctx.clone()).await?,
Request::RowInserts(requests) => {
self.handle_row_inserts(requests, ctx.clone(), false)
self.handle_row_inserts(requests, ctx.clone(), false, false)
.await?
}
Request::Deletes(requests) => self.handle_deletes(requests, ctx.clone()).await?,
@@ -420,6 +420,7 @@ impl Instance {
requests: RowInsertRequests,
ctx: QueryContextRef,
accommodate_existing_schema: bool,
is_single_value: bool,
) -> Result<Output> {
self.inserter
.handle_row_inserts(
@@ -427,6 +428,7 @@ impl Instance {
ctx,
self.statement_executor.as_ref(),
accommodate_existing_schema,
is_single_value,
)
.await
.context(TableOperationSnafu)
@@ -439,7 +441,14 @@ impl Instance {
ctx: QueryContextRef,
) -> Result<Output> {
self.inserter
.handle_last_non_null_inserts(requests, ctx, self.statement_executor.as_ref(), true)
.handle_last_non_null_inserts(
requests,
ctx,
self.statement_executor.as_ref(),
true,
// Influx protocol may writes multiple fields (values).
false,
)
.await
.context(TableOperationSnafu)
}

View File

@@ -52,8 +52,9 @@ impl OpentsdbProtocolHandler for Instance {
None
};
// OpenTSDB is single value.
let output = self
.handle_row_inserts(requests, ctx, true)
.handle_row_inserts(requests, ctx, true, true)
.await
.map_err(BoxedError::new)
.context(servers::error::ExecuteGrpcQuerySnafu)?;

View File

@@ -63,7 +63,7 @@ impl OpenTelemetryProtocolHandler for Instance {
None
};
self.handle_row_inserts(requests, ctx, false)
self.handle_row_inserts(requests, ctx, false, false)
.await
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)
@@ -125,7 +125,7 @@ impl OpenTelemetryProtocolHandler for Instance {
pipeline_params: GreptimePipelineParams,
table_name: String,
ctx: QueryContextRef,
) -> ServerResult<Output> {
) -> ServerResult<Vec<Output>> {
self.plugins
.get::<PermissionCheckerRef>()
.as_ref()
@@ -137,7 +137,7 @@ impl OpenTelemetryProtocolHandler for Instance {
.get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
interceptor_ref.pre_execute(ctx.clone())?;
let (requests, rows) = otlp::logs::to_grpc_insert_requests(
let opt_req = otlp::logs::to_grpc_insert_requests(
request,
pipeline,
pipeline_params,
@@ -148,7 +148,7 @@ impl OpenTelemetryProtocolHandler for Instance {
.await?;
let _guard = if let Some(limiter) = &self.limiter {
let result = limiter.limit_row_inserts(&requests);
let result = limiter.limit_ctx_req(&opt_req);
if result.is_none() {
return InFlightWriteBytesExceededSnafu.fail();
}
@@ -157,10 +157,24 @@ impl OpenTelemetryProtocolHandler for Instance {
None
};
self.handle_log_inserts(requests, ctx)
.await
.inspect(|_| OTLP_LOGS_ROWS.inc_by(rows as u64))
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)
let mut outputs = vec![];
for (temp_ctx, requests) in opt_req.as_req_iter(ctx) {
let cnt = requests
.inserts
.iter()
.filter_map(|r| r.rows.as_ref().map(|r| r.rows.len()))
.sum::<usize>();
let o = self
.handle_log_inserts(requests, temp_ctx)
.await
.inspect(|_| OTLP_LOGS_ROWS.inc_by(cnt as u64))
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?;
outputs.push(o);
}
Ok(outputs)
}
}

View File

@@ -195,7 +195,7 @@ impl PromStoreProtocolHandler for Instance {
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?
} else {
self.handle_row_inserts(request, ctx.clone(), true)
self.handle_row_inserts(request, ctx.clone(), true, true)
.await
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)?

View File

@@ -18,8 +18,11 @@ use std::sync::Arc;
use api::v1::column::Values;
use api::v1::greptime_request::Request;
use api::v1::value::ValueData;
use api::v1::{Decimal128, InsertRequests, IntervalMonthDayNano, RowInsertRequests};
use api::v1::{
Decimal128, InsertRequests, IntervalMonthDayNano, RowInsertRequest, RowInsertRequests,
};
use common_telemetry::{debug, warn};
use pipeline::ContextReq;
pub(crate) type LimiterRef = Arc<Limiter>;
@@ -75,7 +78,9 @@ impl Limiter {
pub fn limit_request(&self, request: &Request) -> Option<InFlightWriteBytesCounter> {
let size = match request {
Request::Inserts(requests) => self.insert_requests_data_size(requests),
Request::RowInserts(requests) => self.rows_insert_requests_data_size(requests),
Request::RowInserts(requests) => {
self.rows_insert_requests_data_size(requests.inserts.iter())
}
_ => 0,
};
self.limit_in_flight_write_bytes(size as u64)
@@ -85,7 +90,12 @@ impl Limiter {
&self,
requests: &RowInsertRequests,
) -> Option<InFlightWriteBytesCounter> {
let size = self.rows_insert_requests_data_size(requests);
let size = self.rows_insert_requests_data_size(requests.inserts.iter());
self.limit_in_flight_write_bytes(size as u64)
}
pub fn limit_ctx_req(&self, opt_req: &ContextReq) -> Option<InFlightWriteBytesCounter> {
let size = self.rows_insert_requests_data_size(opt_req.ref_all_req());
self.limit_in_flight_write_bytes(size as u64)
}
@@ -137,9 +147,12 @@ impl Limiter {
size
}
fn rows_insert_requests_data_size(&self, request: &RowInsertRequests) -> usize {
fn rows_insert_requests_data_size<'a>(
&self,
inserts: impl Iterator<Item = &'a RowInsertRequest>,
) -> usize {
let mut size: usize = 0;
for insert in &request.inserts {
for insert in inserts {
if let Some(rows) = &insert.rows {
for row in &rows.rows {
for value in &row.values {

View File

@@ -233,7 +233,7 @@ impl SlowQueryEventHandler {
.into();
self.inserter
.handle_row_inserts(requests, query_ctx, &self.statement_executor, false)
.handle_row_inserts(requests, query_ctx, &self.statement_executor, false, false)
.await
.context(TableOperationSnafu)?;

View File

@@ -169,7 +169,6 @@ fn convert_to_naive_entry(provider: Arc<KafkaProvider>, record: Record) -> Entry
Entry::Naive(NaiveEntry {
provider: Provider::Kafka(provider),
region_id,
// TODO(weny): should be the offset in the topic
entry_id: record.meta.entry_id,
data: record.data,
})
@@ -182,6 +181,7 @@ fn convert_to_multiple_entry(
) -> Entry {
let mut headers = Vec::with_capacity(records.len());
let mut parts = Vec::with_capacity(records.len());
let entry_id = records.last().map(|r| r.meta.entry_id).unwrap_or_default();
for record in records {
let header = match record.meta.tp {
@@ -197,8 +197,7 @@ fn convert_to_multiple_entry(
Entry::MultiplePart(MultiplePartEntry {
provider: Provider::Kafka(provider),
region_id,
// TODO(weny): should be the offset in the topic
entry_id: 0,
entry_id,
headers,
parts,
})
@@ -369,8 +368,7 @@ mod tests {
Entry::MultiplePart(MultiplePartEntry {
provider: Provider::Kafka(provider.clone()),
region_id,
// TODO(weny): always be 0.
entry_id: 0,
entry_id: 1,
headers: vec![MultiplePartHeader::First],
parts: vec![vec![1; 100]],
})
@@ -388,8 +386,7 @@ mod tests {
Entry::MultiplePart(MultiplePartEntry {
provider: Provider::Kafka(provider.clone()),
region_id,
// TODO(weny): always be 0.
entry_id: 0,
entry_id: 1,
headers: vec![MultiplePartHeader::Last],
parts: vec![vec![1; 100]],
})
@@ -411,8 +408,7 @@ mod tests {
Entry::MultiplePart(MultiplePartEntry {
provider: Provider::Kafka(provider),
region_id,
// TODO(weny): always be 0.
entry_id: 0,
entry_id: 1,
headers: vec![MultiplePartHeader::Middle(0)],
parts: vec![vec![1; 100]],
})

View File

@@ -9,6 +9,7 @@ mock = []
pg_kvbackend = ["dep:tokio-postgres", "common-meta/pg_kvbackend", "dep:deadpool-postgres", "dep:deadpool"]
mysql_kvbackend = ["dep:sqlx", "common-meta/mysql_kvbackend"]
testing = ["common-wal/testing"]
enterprise = ["common-meta/enterprise"]
[lints]
workspace = true

View File

@@ -61,9 +61,9 @@ use tonic::transport::server::{Router, TcpIncoming};
use crate::election::etcd::EtcdElection;
#[cfg(feature = "mysql_kvbackend")]
use crate::election::mysql::MySqlElection;
use crate::election::rds::mysql::MySqlElection;
#[cfg(feature = "pg_kvbackend")]
use crate::election::postgres::PgElection;
use crate::election::rds::postgres::PgElection;
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
use crate::election::CANDIDATE_LEASE_SECS;
use crate::metasrv::builder::MetasrvBuilder;

View File

@@ -13,15 +13,14 @@
// limitations under the License.
pub mod etcd;
#[cfg(feature = "mysql_kvbackend")]
pub mod mysql;
#[cfg(feature = "pg_kvbackend")]
pub mod postgres;
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
pub mod rds;
use std::fmt::{self, Debug};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use common_telemetry::{info, warn};
use common_telemetry::{error, info, warn};
use tokio::sync::broadcast::error::RecvError;
use tokio::sync::broadcast::{self, Receiver, Sender};
@@ -110,6 +109,28 @@ fn listen_leader_change(leader_value: String) -> Sender<LeaderChangeMessage> {
tx
}
/// Sends a leader change message to the channel and sets the `is_leader` flag.
/// If a leader is elected, it will also set the `leader_infancy` flag to true.
fn send_leader_change_and_set_flags(
is_leader: &AtomicBool,
leader_infancy: &AtomicBool,
tx: &Sender<LeaderChangeMessage>,
msg: LeaderChangeMessage,
) {
let is_elected = matches!(msg, LeaderChangeMessage::Elected(_));
if is_leader
.compare_exchange(!is_elected, is_elected, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
{
if is_elected {
leader_infancy.store(true, Ordering::Release);
}
if let Err(e) = tx.send(msg) {
error!(e; "Failed to send leader change message");
}
}
}
#[async_trait::async_trait]
pub trait Election: Send + Sync {
type Leader;

View File

@@ -27,8 +27,8 @@ use tokio::sync::broadcast::Receiver;
use tokio::time::{timeout, MissedTickBehavior};
use crate::election::{
listen_leader_change, Election, LeaderChangeMessage, LeaderKey, CANDIDATES_ROOT,
CANDIDATE_LEASE_SECS, ELECTION_KEY, KEEP_ALIVE_INTERVAL_SECS,
listen_leader_change, send_leader_change_and_set_flags, Election, LeaderChangeMessage,
LeaderKey, CANDIDATES_ROOT, CANDIDATE_LEASE_SECS, ELECTION_KEY, KEEP_ALIVE_INTERVAL_SECS,
};
use crate::error;
use crate::error::Result;
@@ -247,18 +247,12 @@ impl Election for EtcdElection {
}
}
if self
.is_leader
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
{
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::StepDown(Arc::new(leader.clone())))
{
error!(e; "Failed to send leader change message");
}
}
send_leader_change_and_set_flags(
&self.is_leader,
&self.infancy,
&self.leader_watcher,
LeaderChangeMessage::StepDown(Arc::new(leader.clone())),
);
}
Ok(())
@@ -305,20 +299,12 @@ impl EtcdElection {
);
// Only after a successful `keep_alive` is the leader considered official.
if self
.is_leader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
{
self.infancy.store(true, Ordering::Release);
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::Elected(Arc::new(leader)))
{
error!(e; "Failed to send leader change message");
}
}
send_leader_change_and_set_flags(
&self.is_leader,
&self.infancy,
&self.leader_watcher,
LeaderChangeMessage::Elected(Arc::new(leader.clone())),
);
}
Ok(())

View File

@@ -0,0 +1,90 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(feature = "mysql_kvbackend")]
pub mod mysql;
#[cfg(feature = "pg_kvbackend")]
pub mod postgres;
use common_time::Timestamp;
use itertools::Itertools;
use snafu::OptionExt;
use crate::election::LeaderKey;
use crate::error::{Result, UnexpectedSnafu};
// Separator between value and expire time in the lease string.
// A lease is put into rds election in the format:
// <node_info> || __metadata_lease_sep || <expire_time>
const LEASE_SEP: &str = r#"||__metadata_lease_sep||"#;
/// Parses the value and expire time from the given string retrieved from rds.
fn parse_value_and_expire_time(value: &str) -> Result<(String, Timestamp)> {
let (value, expire_time) =
value
.split(LEASE_SEP)
.collect_tuple()
.with_context(|| UnexpectedSnafu {
violated: format!(
"Invalid value {}, expect node info || {} || expire time",
value, LEASE_SEP
),
})?;
// Given expire_time is in the format 'YYYY-MM-DD HH24:MI:SS.MS'
let expire_time = match Timestamp::from_str(expire_time, None) {
Ok(ts) => ts,
Err(_) => UnexpectedSnafu {
violated: format!("Invalid timestamp: {}", expire_time),
}
.fail()?,
};
Ok((value.to_string(), expire_time))
}
/// LeaderKey used for [LeaderChangeMessage] in rds election components.
#[derive(Debug, Clone, Default)]
struct RdsLeaderKey {
name: Vec<u8>,
key: Vec<u8>,
rev: i64,
lease: i64,
}
impl LeaderKey for RdsLeaderKey {
fn name(&self) -> &[u8] {
&self.name
}
fn key(&self) -> &[u8] {
&self.key
}
fn revision(&self) -> i64 {
self.rev
}
fn lease_id(&self) -> i64 {
self.lease
}
}
/// Lease information for rds election.
#[derive(Default, Clone, Debug)]
struct Lease {
leader_value: String,
expire_time: Timestamp,
current: Timestamp,
// `origin` is the original value of the lease, used for CAS.
origin: String,
}

View File

@@ -16,9 +16,8 @@ use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use common_telemetry::{error, warn};
use common_telemetry::warn;
use common_time::Timestamp;
use itertools::Itertools;
use snafu::{ensure, OptionExt, ResultExt};
use sqlx::mysql::{MySqlArguments, MySqlRow};
use sqlx::query::Query;
@@ -26,8 +25,10 @@ use sqlx::{MySql, MySqlConnection, MySqlTransaction, Row};
use tokio::sync::{broadcast, Mutex, MutexGuard};
use tokio::time::MissedTickBehavior;
use crate::election::rds::{parse_value_and_expire_time, Lease, RdsLeaderKey, LEASE_SEP};
use crate::election::{
listen_leader_change, Election, LeaderChangeMessage, LeaderKey, CANDIDATES_ROOT, ELECTION_KEY,
listen_leader_change, send_leader_change_and_set_flags, Election, LeaderChangeMessage,
CANDIDATES_ROOT, ELECTION_KEY,
};
use crate::error::{
DeserializeFromJsonSnafu, MySqlExecutionSnafu, NoLeaderSnafu, Result, SerializeToJsonSnafu,
@@ -35,20 +36,6 @@ use crate::error::{
};
use crate::metasrv::{ElectionRef, LeaderValue, MetasrvNodeInfo};
// Separator between value and expire time.
const LEASE_SEP: &str = r#"||__metadata_lease_sep||"#;
/// Lease information.
/// TODO(CookiePie): PgElection can also use this struct. Refactor it to a common module.
#[derive(Default, Clone, Debug)]
struct Lease {
leader_value: String,
expire_time: Timestamp,
current: Timestamp,
// origin is the origin value of the lease, used for CAS.
origin: String,
}
struct ElectionSqlFactory<'a> {
table_name: &'a str,
meta_lease_ttl_secs: u64,
@@ -204,55 +191,6 @@ impl<'a> ElectionSqlFactory<'a> {
}
}
/// Parse the value and expire time from the given string. The value should be in the format "value || LEASE_SEP || expire_time".
fn parse_value_and_expire_time(value: &str) -> Result<(String, Timestamp)> {
let (value, expire_time) =
value
.split(LEASE_SEP)
.collect_tuple()
.with_context(|| UnexpectedSnafu {
violated: format!(
"Invalid value {}, expect node info || {} || expire time",
value, LEASE_SEP
),
})?;
// Given expire_time is in the format 'YYYY-MM-DD HH24:MI:SS.MS'
let expire_time = match Timestamp::from_str(expire_time, None) {
Ok(ts) => ts,
Err(_) => UnexpectedSnafu {
violated: format!("Invalid timestamp: {}", expire_time),
}
.fail()?,
};
Ok((value.to_string(), expire_time))
}
#[derive(Debug, Clone, Default)]
struct MySqlLeaderKey {
name: Vec<u8>,
key: Vec<u8>,
rev: i64,
lease: i64,
}
impl LeaderKey for MySqlLeaderKey {
fn name(&self) -> &[u8] {
&self.name
}
fn key(&self) -> &[u8] {
&self.key
}
fn revision(&self) -> i64 {
self.rev
}
fn lease_id(&self) -> i64 {
self.lease
}
}
enum Executor<'a> {
Default(MutexGuard<'a, MySqlConnection>),
Txn(MySqlTransaction<'a>),
@@ -767,23 +705,17 @@ impl MySqlElection {
/// Still consider itself as the leader locally but failed to acquire the lock. Step down without deleting the key.
async fn step_down_without_lock(&self) -> Result<()> {
let key = self.election_key().into_bytes();
let leader_key = MySqlLeaderKey {
let leader_key = RdsLeaderKey {
name: self.leader_value.clone().into_bytes(),
key: key.clone(),
..Default::default()
};
if self
.is_leader
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
{
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::StepDown(Arc::new(leader_key)))
{
error!(e; "Failed to send leader change message");
}
}
send_leader_change_and_set_flags(
&self.is_leader,
&self.leader_infancy,
&self.leader_watcher,
LeaderChangeMessage::StepDown(Arc::new(leader_key)),
);
Ok(())
}
@@ -791,7 +723,7 @@ impl MySqlElection {
/// Caution: Should only elected while holding the lock.
async fn elected(&self, executor: &mut Executor<'_>) -> Result<()> {
let key = self.election_key();
let leader_key = MySqlLeaderKey {
let leader_key = RdsLeaderKey {
name: self.leader_value.clone().into_bytes(),
key: key.clone().into_bytes(),
..Default::default()
@@ -800,20 +732,12 @@ impl MySqlElection {
self.put_value_with_lease(&key, &self.leader_value, self.meta_lease_ttl_secs, executor)
.await?;
if self
.is_leader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
{
self.leader_infancy.store(true, Ordering::Release);
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::Elected(Arc::new(leader_key)))
{
error!(e; "Failed to send leader change message");
}
}
send_leader_change_and_set_flags(
&self.is_leader,
&self.leader_infancy,
&self.leader_watcher,
LeaderChangeMessage::Elected(Arc::new(leader_key)),
);
Ok(())
}

View File

@@ -18,15 +18,16 @@ use std::time::Duration;
use common_telemetry::{error, warn};
use common_time::Timestamp;
use itertools::Itertools;
use snafu::{ensure, OptionExt, ResultExt};
use tokio::sync::broadcast;
use tokio::time::MissedTickBehavior;
use tokio_postgres::types::ToSql;
use tokio_postgres::Client;
use crate::election::rds::{parse_value_and_expire_time, Lease, RdsLeaderKey, LEASE_SEP};
use crate::election::{
listen_leader_change, Election, LeaderChangeMessage, LeaderKey, CANDIDATES_ROOT, ELECTION_KEY,
listen_leader_change, send_leader_change_and_set_flags, Election, LeaderChangeMessage,
CANDIDATES_ROOT, ELECTION_KEY,
};
use crate::error::{
DeserializeFromJsonSnafu, NoLeaderSnafu, PostgresExecutionSnafu, Result, SerializeToJsonSnafu,
@@ -34,9 +35,6 @@ use crate::error::{
};
use crate::metasrv::{ElectionRef, LeaderValue, MetasrvNodeInfo};
// Separator between value and expire time.
const LEASE_SEP: &str = r#"||__metadata_lease_sep||"#;
struct ElectionSqlFactory<'a> {
lock_id: u64,
table_name: &'a str,
@@ -173,54 +171,6 @@ impl<'a> ElectionSqlFactory<'a> {
}
}
/// Parse the value and expire time from the given string. The value should be in the format "value || LEASE_SEP || expire_time".
fn parse_value_and_expire_time(value: &str) -> Result<(String, Timestamp)> {
let (value, expire_time) = value
.split(LEASE_SEP)
.collect_tuple()
.context(UnexpectedSnafu {
violated: format!(
"Invalid value {}, expect node info || {} || expire time",
value, LEASE_SEP
),
})?;
// Given expire_time is in the format 'YYYY-MM-DD HH24:MI:SS.MS'
let expire_time = match Timestamp::from_str(expire_time, None) {
Ok(ts) => ts,
Err(_) => UnexpectedSnafu {
violated: format!("Invalid timestamp: {}", expire_time),
}
.fail()?,
};
Ok((value.to_string(), expire_time))
}
#[derive(Debug, Clone, Default)]
struct PgLeaderKey {
name: Vec<u8>,
key: Vec<u8>,
rev: i64,
lease: i64,
}
impl LeaderKey for PgLeaderKey {
fn name(&self) -> &[u8] {
&self.name
}
fn key(&self) -> &[u8] {
&self.key
}
fn revision(&self) -> i64 {
self.rev
}
fn lease_id(&self) -> i64 {
self.lease
}
}
/// PostgreSql implementation of Election.
pub struct PgElection {
leader_value: String,
@@ -314,27 +264,31 @@ impl Election for PgElection {
loop {
let _ = keep_alive_interval.tick().await;
let (_, prev_expire_time, current_time, origin) = self
.get_value_with_lease(&key, true)
let lease = self
.get_value_with_lease(&key)
.await?
.unwrap_or_default();
.context(UnexpectedSnafu {
violated: format!("Failed to get lease for key: {:?}", key),
})?;
ensure!(
prev_expire_time > current_time,
lease.expire_time > lease.current,
UnexpectedSnafu {
violated: format!(
"Candidate lease expired at {:?} (current time {:?}), key: {:?}",
prev_expire_time,
current_time,
String::from_utf8_lossy(&key.into_bytes())
lease.expire_time, lease.current, key
),
}
);
// Safety: origin is Some since we are using `get_value_with_lease` with `true`.
let origin = origin.unwrap();
self.update_value_with_lease(&key, &origin, &node_info, self.candidate_lease_ttl_secs)
.await?;
self.update_value_with_lease(
&key,
&lease.origin,
&node_info,
self.candidate_lease_ttl_secs,
)
.await?;
}
}
@@ -400,11 +354,9 @@ impl Election for PgElection {
Ok(self.leader_value.as_bytes().into())
} else {
let key = self.election_key();
if let Some((leader, expire_time, current, _)) =
self.get_value_with_lease(&key, false).await?
{
ensure!(expire_time > current, NoLeaderSnafu);
Ok(leader.as_bytes().into())
if let Some(lease) = self.get_value_with_lease(&key).await? {
ensure!(lease.expire_time > lease.current, NoLeaderSnafu);
Ok(lease.leader_value.as_bytes().into())
} else {
NoLeaderSnafu.fail()
}
@@ -422,11 +374,7 @@ impl Election for PgElection {
impl PgElection {
/// Returns value, expire time and current time. If `with_origin` is true, the origin string is also returned.
async fn get_value_with_lease(
&self,
key: &str,
with_origin: bool,
) -> Result<Option<(String, Timestamp, Timestamp, Option<String>)>> {
async fn get_value_with_lease(&self, key: &str) -> Result<Option<Lease>> {
let key = key.as_bytes();
let res = self
.client
@@ -451,16 +399,12 @@ impl PgElection {
String::from_utf8_lossy(res[0].try_get(0).unwrap_or_default());
let (value, expire_time) = parse_value_and_expire_time(&value_and_expire_time)?;
if with_origin {
Ok(Some((
value,
expire_time,
current_time,
Some(value_and_expire_time.to_string()),
)))
} else {
Ok(Some((value, expire_time, current_time, None)))
}
Ok(Some(Lease {
leader_value: value,
expire_time,
current: current_time,
origin: value_and_expire_time.to_string(),
}))
}
}
@@ -579,16 +523,18 @@ impl PgElection {
let key = self.election_key();
// Case 1
if self.is_leader() {
match self.get_value_with_lease(&key, true).await? {
Some((prev_leader, expire_time, current, prev)) => {
match (prev_leader == self.leader_value, expire_time > current) {
match self.get_value_with_lease(&key).await? {
Some(lease) => {
match (
lease.leader_value == self.leader_value,
lease.expire_time > lease.current,
) {
// Case 1.1
(true, true) => {
// Safety: prev is Some since we are using `get_value_with_lease` with `true`.
let prev = prev.unwrap();
self.update_value_with_lease(
&key,
&prev,
&lease.origin,
&self.leader_value,
self.meta_lease_ttl_secs,
)
@@ -635,12 +581,12 @@ impl PgElection {
if self.is_leader() {
self.step_down_without_lock().await?;
}
let (_, expire_time, current, _) = self
.get_value_with_lease(&key, false)
let lease = self
.get_value_with_lease(&key)
.await?
.context(NoLeaderSnafu)?;
// Case 2
ensure!(expire_time > current, NoLeaderSnafu);
ensure!(lease.expire_time > lease.current, NoLeaderSnafu);
// Case 3
Ok(())
}
@@ -653,35 +599,29 @@ impl PgElection {
/// Should only step down while holding the advisory lock.
async fn step_down(&self) -> Result<()> {
let key = self.election_key();
let leader_key = PgLeaderKey {
let leader_key = RdsLeaderKey {
name: self.leader_value.clone().into_bytes(),
key: key.clone().into_bytes(),
..Default::default()
};
if self
.is_leader
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
{
self.delete_value(&key).await?;
self.client
.query(&self.sql_set.step_down, &[])
.await
.context(PostgresExecutionSnafu)?;
if let Err(e) = self
.leader_watcher
.send(LeaderChangeMessage::StepDown(Arc::new(leader_key)))
{
error!(e; "Failed to send leader change message");
}
}
self.delete_value(&key).await?;
self.client
.query(&self.sql_set.step_down, &[])
.await
.context(PostgresExecutionSnafu)?;
send_leader_change_and_set_flags(
&self.is_leader,
&self.leader_infancy,
&self.leader_watcher,
LeaderChangeMessage::StepDown(Arc::new(leader_key)),
);
Ok(())
}
/// Still consider itself as the leader locally but failed to acquire the lock. Step down without deleting the key.
async fn step_down_without_lock(&self) -> Result<()> {
let key = self.election_key().into_bytes();
let leader_key = PgLeaderKey {
let leader_key = RdsLeaderKey {
name: self.leader_value.clone().into_bytes(),
key: key.clone(),
..Default::default()
@@ -705,7 +645,7 @@ impl PgElection {
/// Caution: Should only elected while holding the advisory lock.
async fn elected(&self) -> Result<()> {
let key = self.election_key();
let leader_key = PgLeaderKey {
let leader_key = RdsLeaderKey {
name: self.leader_value.clone().into_bytes(),
key: key.clone().into_bytes(),
..Default::default()
@@ -800,23 +740,22 @@ mod tests {
.unwrap();
assert!(res);
let (value_get, _, _, prev) = pg_election
.get_value_with_lease(&key, true)
let lease = pg_election
.get_value_with_lease(&key)
.await
.unwrap()
.unwrap();
assert_eq!(value_get, value);
assert_eq!(lease.leader_value, value);
let prev = prev.unwrap();
pg_election
.update_value_with_lease(&key, &prev, &value, pg_election.meta_lease_ttl_secs)
.update_value_with_lease(&key, &lease.origin, &value, pg_election.meta_lease_ttl_secs)
.await
.unwrap();
let res = pg_election.delete_value(&key).await.unwrap();
assert!(res);
let res = pg_election.get_value_with_lease(&key, false).await.unwrap();
let res = pg_election.get_value_with_lease(&key).await.unwrap();
assert!(res.is_none());
for i in 0..10 {
@@ -963,13 +902,13 @@ mod tests {
};
leader_pg_election.elected().await.unwrap();
let (leader, expire_time, current, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(expire_time > current);
assert!(lease.leader_value == leader_value);
assert!(lease.expire_time > lease.current);
assert!(leader_pg_election.is_leader());
match rx.recv().await {
@@ -986,12 +925,12 @@ mod tests {
}
leader_pg_election.step_down_without_lock().await.unwrap();
let (leader, _, _, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(lease.leader_value == leader_value);
assert!(!leader_pg_election.is_leader());
match rx.recv().await {
@@ -1008,13 +947,13 @@ mod tests {
}
leader_pg_election.elected().await.unwrap();
let (leader, expire_time, current, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(expire_time > current);
assert!(lease.leader_value == leader_value);
assert!(lease.expire_time > lease.current);
assert!(leader_pg_election.is_leader());
match rx.recv().await {
@@ -1032,7 +971,7 @@ mod tests {
leader_pg_election.step_down().await.unwrap();
let res = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap();
assert!(res.is_none());
@@ -1085,13 +1024,13 @@ mod tests {
let res: bool = res[0].get(0);
assert!(res);
leader_pg_election.leader_action().await.unwrap();
let (leader, expire_time, current, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(expire_time > current);
assert!(lease.leader_value == leader_value);
assert!(lease.expire_time > lease.current);
assert!(leader_pg_election.is_leader());
match rx.recv().await {
@@ -1116,13 +1055,15 @@ mod tests {
let res: bool = res[0].get(0);
assert!(res);
leader_pg_election.leader_action().await.unwrap();
let (leader, new_expire_time, current, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let new_lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(new_expire_time > current && new_expire_time > expire_time);
assert!(new_lease.leader_value == leader_value);
assert!(
new_lease.expire_time > new_lease.current && new_lease.expire_time > lease.expire_time
);
assert!(leader_pg_election.is_leader());
// Step 3: Something wrong, the leader lease expired.
@@ -1137,7 +1078,7 @@ mod tests {
assert!(res);
leader_pg_election.leader_action().await.unwrap();
let res = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap();
assert!(res.is_none());
@@ -1164,13 +1105,13 @@ mod tests {
let res: bool = res[0].get(0);
assert!(res);
leader_pg_election.leader_action().await.unwrap();
let (leader, expire_time, current, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(expire_time > current);
assert!(lease.leader_value == leader_value);
assert!(lease.expire_time > lease.current);
assert!(leader_pg_election.is_leader());
match rx.recv().await {
@@ -1193,7 +1134,7 @@ mod tests {
.unwrap();
leader_pg_election.leader_action().await.unwrap();
let res = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap();
assert!(res.is_none());
@@ -1221,13 +1162,13 @@ mod tests {
let res: bool = res[0].get(0);
assert!(res);
leader_pg_election.leader_action().await.unwrap();
let (leader, expire_time, current, _) = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
let lease = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap()
.unwrap();
assert!(leader == leader_value);
assert!(expire_time > current);
assert!(lease.leader_value == leader_value);
assert!(lease.expire_time > lease.current);
assert!(leader_pg_election.is_leader());
match rx.recv().await {
@@ -1261,7 +1202,7 @@ mod tests {
.unwrap();
leader_pg_election.leader_action().await.unwrap();
let res = leader_pg_election
.get_value_with_lease(&leader_pg_election.election_key(), false)
.get_value_with_lease(&leader_pg_election.election_key())
.await
.unwrap();
assert!(res.is_none());

View File

@@ -280,7 +280,7 @@ impl MetasrvBuilder {
ensure!(
options.allow_region_failover_on_local_wal,
error::UnexpectedSnafu {
violated: "Region failover is not supported in the local WAL implementation!
violated: "Region failover is not supported in the local WAL implementation!
If you want to enable region failover for local WAL, please set `allow_region_failover_on_local_wal` to true.",
}
);
@@ -351,6 +351,11 @@ impl MetasrvBuilder {
};
let leader_region_registry = Arc::new(LeaderRegionRegistry::default());
#[cfg(feature = "enterprise")]
let trigger_ddl_manager = plugins
.as_ref()
.and_then(|plugins| plugins.get::<common_meta::ddl_manager::TriggerDdlManagerRef>());
let ddl_manager = Arc::new(
DdlManager::try_new(
DdlContext {
@@ -366,6 +371,8 @@ impl MetasrvBuilder {
},
procedure_manager.clone(),
true,
#[cfg(feature = "enterprise")]
trigger_ddl_manager,
)
.context(error::InitDdlManagerSnafu)?,
);

View File

@@ -343,13 +343,14 @@ mod test {
#[tokio::test]
async fn test_wal_prune_ticker() {
let (tx, mut rx) = WalPruneManager::channel();
let interval = Duration::from_millis(10);
let interval = Duration::from_millis(50);
let ticker = WalPruneTicker::new(interval, tx);
assert_eq!(ticker.name(), "WalPruneTicker");
for _ in 0..2 {
ticker.start();
sleep(2 * interval).await;
// wait a bit longer to make sure not all ticks are skipped
sleep(4 * interval).await;
assert!(!rx.is_empty());
while let Ok(event) = rx.try_recv() {
assert_matches!(event, Event::Tick);

View File

@@ -657,13 +657,6 @@ pub enum Error {
unexpected_entry_id: u64,
},
#[snafu(display("Read the corrupted log entry, region_id: {}", region_id))]
CorruptedEntry {
region_id: RegionId,
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"Failed to download file, region_id: {}, file_id: {}, file_type: {:?}",
region_id,
@@ -1106,7 +1099,6 @@ impl ErrorExt for Error {
| EncodeMemtable { .. }
| CreateDir { .. }
| ReadDataPart { .. }
| CorruptedEntry { .. }
| BuildEntry { .. }
| Metadata { .. }
| MitoManifestInfo { .. } => StatusCode::Internal,

View File

@@ -65,7 +65,7 @@ impl SimpleBulkMemtable {
} else {
dedup
};
let series = RwLock::new(Series::new(&region_metadata));
let series = RwLock::new(Series::with_capacity(&region_metadata, 1024));
Self {
id,

View File

@@ -60,7 +60,7 @@ use crate::region::options::MergeMode;
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};
/// Initial vector builder capacity.
const INITIAL_BUILDER_CAPACITY: usize = 1024 * 8;
const INITIAL_BUILDER_CAPACITY: usize = 4;
/// Vector builder capacity.
const BUILDER_CAPACITY: usize = 512;
@@ -663,15 +663,19 @@ pub(crate) struct Series {
}
impl Series {
pub(crate) fn new(region_metadata: &RegionMetadataRef) -> Self {
pub(crate) fn with_capacity(region_metadata: &RegionMetadataRef, builder_cap: usize) -> Self {
Self {
pk_cache: None,
active: ValueBuilder::new(region_metadata, INITIAL_BUILDER_CAPACITY),
active: ValueBuilder::new(region_metadata, builder_cap),
frozen: vec![],
region_metadata: region_metadata.clone(),
}
}
pub(crate) fn new(region_metadata: &RegionMetadataRef) -> Self {
Self::with_capacity(region_metadata, INITIAL_BUILDER_CAPACITY)
}
pub fn is_empty(&self) -> bool {
self.active.len() == 0 && self.frozen.is_empty()
}

View File

@@ -15,7 +15,7 @@
use std::collections::{BTreeMap, BTreeSet};
use common_telemetry::warn;
use datafusion_common::ScalarValue;
use datafusion_common::{Column, ScalarValue};
use datafusion_expr::expr::InList;
use datafusion_expr::{BinaryExpr, Expr, Operator};
use datatypes::data_type::ConcreteDataType;
@@ -121,6 +121,7 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
Ok(())
}
Operator::Eq => self.collect_eq(left, right),
Operator::Or => self.collect_or_eq_list(left, right),
_ => Ok(()),
},
Expr::InList(in_list) => self.collect_in_list(in_list),
@@ -152,10 +153,8 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
/// Collects an equality expression (column = value)
fn collect_eq(&mut self, left: &Expr, right: &Expr) -> Result<()> {
let (col, lit) = match (left, right) {
(Expr::Column(col), Expr::Literal(lit)) => (col, lit),
(Expr::Literal(lit), Expr::Column(col)) => (col, lit),
_ => return Ok(()),
let Some((col, lit)) = Self::eq_expr_col_lit(left, right)? else {
return Ok(());
};
if lit.is_null() {
return Ok(());
@@ -218,6 +217,83 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
Ok(())
}
/// Collects an or expression in the form of `column = lit OR column = lit OR ...`.
fn collect_or_eq_list(&mut self, left: &Expr, right: &Expr) -> Result<()> {
let (eq_left, eq_right, or_list) = if let Expr::BinaryExpr(BinaryExpr {
left: l,
op: Operator::Eq,
right: r,
}) = left
{
(l, r, right)
} else if let Expr::BinaryExpr(BinaryExpr {
left: l,
op: Operator::Eq,
right: r,
}) = right
{
(l, r, left)
} else {
return Ok(());
};
let Some((col, lit)) = Self::eq_expr_col_lit(eq_left, eq_right)? else {
return Ok(());
};
if lit.is_null() {
return Ok(());
}
let Some((column_id, data_type)) = self.column_id_and_type(&col.name)? else {
return Ok(());
};
let mut inlist = BTreeSet::new();
inlist.insert(encode_lit(lit, data_type.clone())?);
if Self::collect_or_eq_list_rec(&col.name, &data_type, or_list, &mut inlist)? {
self.predicates
.entry(column_id)
.or_default()
.push(InListPredicate { list: inlist });
}
Ok(())
}
fn collect_or_eq_list_rec(
column_name: &str,
data_type: &ConcreteDataType,
expr: &Expr,
inlist: &mut BTreeSet<Bytes>,
) -> Result<bool> {
if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr {
match op {
Operator::Or => {
let r = Self::collect_or_eq_list_rec(column_name, data_type, left, inlist)?
.then(|| {
Self::collect_or_eq_list_rec(column_name, data_type, right, inlist)
})
.transpose()?
.unwrap_or(false);
return Ok(r);
}
Operator::Eq => {
let Some((col, lit)) = Self::eq_expr_col_lit(left, right)? else {
return Ok(false);
};
if lit.is_null() || column_name != col.name {
return Ok(false);
}
let bytes = encode_lit(lit, data_type.clone())?;
inlist.insert(bytes);
return Ok(true);
}
_ => {}
}
}
Ok(false)
}
/// Helper function to get non-null literal value
fn nonnull_lit(expr: &Expr) -> Option<&ScalarValue> {
match expr {
@@ -225,6 +301,19 @@ impl<'a> BloomFilterIndexApplierBuilder<'a> {
_ => None,
}
}
/// Helper function to get the column and literal value from an equality expr (column = lit)
fn eq_expr_col_lit<'b>(
left: &'b Expr,
right: &'b Expr,
) -> Result<Option<(&'b Column, &'b ScalarValue)>> {
let (col, lit) = match (left, right) {
(Expr::Column(col), Expr::Literal(lit)) => (col, lit),
(Expr::Literal(lit), Expr::Column(col)) => (col, lit),
_ => return Ok(None),
};
Ok(Some((col, lit)))
}
}
// TODO(ruihang): extract this and the one under inverted_index into a common util mod.
@@ -241,6 +330,7 @@ fn encode_lit(lit: &ScalarValue, data_type: ConcreteDataType) -> Result<Bytes> {
mod tests {
use api::v1::SemanticType;
use datafusion_common::Column;
use datafusion_expr::{col, lit};
use datatypes::schema::ColumnSchema;
use object_store::services::Memory;
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
@@ -356,6 +446,66 @@ mod tests {
assert_eq!(column_predicates[0].list.len(), 3);
}
#[test]
fn test_build_with_or_chain() {
let (_d, factory) = PuffinManagerFactory::new_for_test_block("test_build_with_or_chain_");
let metadata = test_region_metadata();
let builder = || {
BloomFilterIndexApplierBuilder::new(
"test".to_string(),
test_object_store(),
&metadata,
factory.clone(),
)
};
let expr = col("column1")
.eq(lit("value1"))
.or(col("column1")
.eq(lit("value2"))
.or(col("column1").eq(lit("value4"))))
.or(col("column1").eq(lit("value3")));
let result = builder().build(&[expr]).unwrap();
assert!(result.is_some());
let predicates = result.unwrap().predicates;
let column_predicates = predicates.get(&1).unwrap();
assert_eq!(column_predicates.len(), 1);
assert_eq!(column_predicates[0].list.len(), 4);
let or_chain_predicates = &column_predicates[0].list;
let encode_str = |s: &str| {
encode_lit(
&ScalarValue::Utf8(Some(s.to_string())),
ConcreteDataType::string_datatype(),
)
.unwrap()
};
assert!(or_chain_predicates.contains(&encode_str("value1")));
assert!(or_chain_predicates.contains(&encode_str("value2")));
assert!(or_chain_predicates.contains(&encode_str("value3")));
assert!(or_chain_predicates.contains(&encode_str("value4")));
// Test with null value
let expr = col("column1").eq(Expr::Literal(ScalarValue::Utf8(None)));
let result = builder().build(&[expr]).unwrap();
assert!(result.is_none());
// Test with different column
let expr = col("column1")
.eq(lit("value1"))
.or(col("column2").eq(lit("value2")));
let result = builder().build(&[expr]).unwrap();
assert!(result.is_none());
// Test with non or chain
let expr = col("column1")
.eq(lit("value1"))
.or(col("column1").gt_eq(lit("value2")));
let result = builder().build(&[expr]).unwrap();
assert!(result.is_none());
}
#[test]
fn test_build_with_and_expressions() {
let (_d, factory) = PuffinManagerFactory::new_for_test_block("test_build_with_and_");

View File

@@ -16,7 +16,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use async_stream::stream;
use common_telemetry::{debug, error};
use common_telemetry::{debug, error, warn};
use futures::future::join_all;
use snafu::OptionExt;
use store_api::logstore::entry::Entry;
@@ -133,11 +133,15 @@ impl WalEntryReader for WalEntryReceiver {
}
let stream = stream! {
let mut buffered_entry = None;
let mut buffered_entry: Option<Entry> = None;
while let Some(next_entry) = entry_receiver.recv().await {
match buffered_entry.take() {
Some(entry) => {
yield decode_raw_entry(entry);
if entry.is_complete() {
yield decode_raw_entry(entry);
} else {
warn!("Ignoring incomplete entry: {}", entry);
}
buffered_entry = Some(next_entry);
},
None => {
@@ -149,6 +153,8 @@ impl WalEntryReader for WalEntryReceiver {
// Ignores tail corrupted data.
if entry.is_complete() {
yield decode_raw_entry(entry);
} else {
warn!("Ignoring incomplete entry: {}", entry);
}
}
};
@@ -213,7 +219,6 @@ pub fn build_wal_entry_distributor_and_receivers(
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use api::v1::{Mutation, OpType, WalEntry};
use futures::{stream, TryStreamExt};
@@ -385,6 +390,7 @@ mod tests {
#[tokio::test]
async fn test_tail_corrupted_stream() {
common_telemetry::init_default_ut_logging();
let mut entries = vec![];
let region1 = RegionId::new(1, 1);
let region1_expected_wal_entry = WalEntry {
@@ -484,6 +490,7 @@ mod tests {
#[tokio::test]
async fn test_part_corrupted_stream() {
common_telemetry::init_default_ut_logging();
let mut entries = vec![];
let region1 = RegionId::new(1, 1);
let region1_expected_wal_entry = WalEntry {
@@ -504,7 +511,7 @@ mod tests {
3,
));
entries.extend(vec![
// The corrupted data.
// The incomplete entry.
Entry::MultiplePart(MultiplePartEntry {
provider: provider.clone(),
region_id: region2,
@@ -512,6 +519,7 @@ mod tests {
headers: vec![MultiplePartHeader::First],
parts: vec![vec![1; 100]],
}),
// The incomplete entry.
Entry::MultiplePart(MultiplePartEntry {
provider: provider.clone(),
region_id: region2,
@@ -545,14 +553,14 @@ mod tests {
vec![(0, region1_expected_wal_entry)]
);
assert_matches!(
assert_eq!(
streams
.get_mut(1)
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap_err(),
error::Error::CorruptedEntry { .. }
.unwrap(),
vec![]
);
}

View File

@@ -14,21 +14,25 @@
use api::v1::WalEntry;
use async_stream::stream;
use common_telemetry::tracing::warn;
use futures::StreamExt;
use object_store::Buffer;
use prost::Message;
use snafu::{ensure, ResultExt};
use snafu::ResultExt;
use store_api::logstore::entry::Entry;
use store_api::logstore::provider::Provider;
use crate::error::{CorruptedEntrySnafu, DecodeWalSnafu, Result};
use crate::error::{DecodeWalSnafu, Result};
use crate::wal::raw_entry_reader::RawEntryReader;
use crate::wal::{EntryId, WalEntryStream};
/// Decodes the [Entry] into [WalEntry].
///
/// The caller must ensure the [Entry] is complete.
pub(crate) fn decode_raw_entry(raw_entry: Entry) -> Result<(EntryId, WalEntry)> {
let entry_id = raw_entry.entry_id();
let region_id = raw_entry.region_id();
ensure!(raw_entry.is_complete(), CorruptedEntrySnafu { region_id });
debug_assert!(raw_entry.is_complete());
let buffer = into_buffer(raw_entry);
let wal_entry = WalEntry::decode(buffer).context(DecodeWalSnafu { region_id })?;
Ok((entry_id, wal_entry))
@@ -58,7 +62,7 @@ impl WalEntryReader for NoopEntryReader {
}
}
/// A Reader reads the [RawEntry] from [RawEntryReader] and decodes [RawEntry] into [WalEntry].
/// A Reader reads the [Entry] from [RawEntryReader] and decodes [Entry] into [WalEntry].
pub struct LogStoreEntryReader<R> {
reader: R,
}
@@ -75,11 +79,15 @@ impl<R: RawEntryReader> WalEntryReader for LogStoreEntryReader<R> {
let mut stream = reader.read(ns, start_id)?;
let stream = stream! {
let mut buffered_entry = None;
let mut buffered_entry: Option<Entry> = None;
while let Some(next_entry) = stream.next().await {
match buffered_entry.take() {
Some(entry) => {
yield decode_raw_entry(entry);
if entry.is_complete() {
yield decode_raw_entry(entry);
} else {
warn!("Ignoring incomplete entry: {}", entry);
}
buffered_entry = Some(next_entry?);
},
None => {
@@ -91,6 +99,8 @@ impl<R: RawEntryReader> WalEntryReader for LogStoreEntryReader<R> {
// Ignores tail corrupted data.
if entry.is_complete() {
yield decode_raw_entry(entry);
} else {
warn!("Ignoring incomplete entry: {}", entry);
}
}
};
@@ -101,7 +111,6 @@ impl<R: RawEntryReader> WalEntryReader for LogStoreEntryReader<R> {
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use api::v1::{Mutation, OpType, WalEntry};
use futures::TryStreamExt;
@@ -110,7 +119,6 @@ mod tests {
use store_api::logstore::provider::Provider;
use store_api::storage::RegionId;
use crate::error;
use crate::test_util::wal_util::MockRawEntryStream;
use crate::wal::entry_reader::{LogStoreEntryReader, WalEntryReader};
@@ -141,7 +149,7 @@ mod tests {
headers: vec![MultiplePartHeader::First, MultiplePartHeader::Last],
parts,
}),
// The tail corrupted data.
// The tail incomplete entry.
Entry::MultiplePart(MultiplePartEntry {
provider: provider.clone(),
region_id: RegionId::new(1, 1),
@@ -171,6 +179,7 @@ mod tests {
let provider = Provider::kafka_provider("my_topic".to_string());
let raw_entry_stream = MockRawEntryStream {
entries: vec![
// The incomplete entry.
Entry::MultiplePart(MultiplePartEntry {
provider: provider.clone(),
region_id: RegionId::new(1, 1),
@@ -189,12 +198,12 @@ mod tests {
};
let mut reader = LogStoreEntryReader::new(raw_entry_stream);
let err = reader
let entries = reader
.read(&provider, 0)
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap_err();
assert_matches!(err, error::Error::CorruptedEntry { .. });
.unwrap();
assert!(entries.is_empty());
}
}

View File

@@ -6,7 +6,7 @@ license.workspace = true
[features]
testing = []
enterprise = ["sql/enterprise"]
enterprise = ["common-meta/enterprise", "sql/enterprise"]
[lints]
workspace = true

View File

@@ -703,6 +703,14 @@ pub enum Error {
location: Location,
},
#[cfg(feature = "enterprise")]
#[snafu(display("Invalid trigger name: {name}"))]
InvalidTriggerName {
name: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Empty {} expr", name))]
EmptyDdlExpr {
name: String,
@@ -872,6 +880,8 @@ impl ErrorExt for Error {
| Error::CursorNotFound { .. }
| Error::CursorExists { .. }
| Error::CreatePartitionRules { .. } => StatusCode::InvalidArguments,
#[cfg(feature = "enterprise")]
Error::InvalidTriggerName { .. } => StatusCode::InvalidArguments,
Error::TableAlreadyExists { .. } | Error::ViewAlreadyExists { .. } => {
StatusCode::TableAlreadyExists
}

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(feature = "enterprise")]
pub mod trigger;
use std::collections::{HashMap, HashSet};
use api::helper::ColumnDataTypeWrapper;
@@ -55,6 +58,8 @@ use sql::statements::{
use sql::util::extract_tables_from_query;
use table::requests::{TableOptions, FILE_TABLE_META_KEY};
use table::table_reference::TableReference;
#[cfg(feature = "enterprise")]
pub use trigger::to_create_trigger_task_expr;
use crate::error::{
BuildCreateExprOnInsertionSnafu, ColumnDataTypeSnafu, ConvertColumnDefaultConstraintSnafu,

View File

@@ -0,0 +1,146 @@
use api::v1::notify_channel::ChannelType as PbChannelType;
use api::v1::{
CreateTriggerExpr as PbCreateTriggerExpr, NotifyChannel as PbNotifyChannel,
WebhookOptions as PbWebhookOptions,
};
use session::context::QueryContextRef;
use snafu::ensure;
use sql::ast::ObjectName;
use sql::statements::create::trigger::{ChannelType, CreateTrigger};
use crate::error::Result;
pub fn to_create_trigger_task_expr(
create_trigger: CreateTrigger,
query_ctx: &QueryContextRef,
) -> Result<PbCreateTriggerExpr> {
let CreateTrigger {
trigger_name,
if_not_exists,
query,
interval,
labels,
annotations,
channels,
} = create_trigger;
let catalog_name = query_ctx.current_catalog().to_string();
let trigger_name = sanitize_trigger_name(trigger_name)?;
let channels = channels
.into_iter()
.map(|c| {
let name = c.name.value;
match c.channel_type {
ChannelType::Webhook(am) => PbNotifyChannel {
name,
channel_type: Some(PbChannelType::Webhook(PbWebhookOptions {
url: am.url.value,
opts: am.options.into_map(),
})),
},
}
})
.collect::<Vec<_>>();
let sql = query.to_string();
let labels = labels.into_map();
let annotations = annotations.into_map();
Ok(PbCreateTriggerExpr {
catalog_name,
trigger_name,
create_if_not_exists: if_not_exists,
sql,
channels,
labels,
annotations,
interval,
})
}
fn sanitize_trigger_name(mut trigger_name: ObjectName) -> Result<String> {
ensure!(
trigger_name.0.len() == 1,
crate::error::InvalidTriggerNameSnafu {
name: trigger_name.to_string(),
}
);
// safety: we've checked trigger_name.0 has exactly one element.
Ok(trigger_name.0.swap_remove(0).value)
}
#[cfg(test)]
mod tests {
use session::context::QueryContext;
use sql::dialect::GreptimeDbDialect;
use sql::parser::{ParseOptions, ParserContext};
use sql::statements::statement::Statement;
use super::*;
#[test]
fn test_sanitize_trigger_name() {
let name = ObjectName(vec![sql::ast::Ident::new("my_trigger")]);
let sanitized = sanitize_trigger_name(name).unwrap();
assert_eq!(sanitized, "my_trigger");
let name = ObjectName(vec![sql::ast::Ident::with_quote('`', "my_trigger")]);
let sanitized = sanitize_trigger_name(name).unwrap();
assert_eq!(sanitized, "my_trigger");
let name = ObjectName(vec![sql::ast::Ident::with_quote('\'', "trigger")]);
let sanitized = sanitize_trigger_name(name).unwrap();
assert_eq!(sanitized, "trigger");
}
#[test]
fn test_to_create_trigger_task_expr() {
let sql = r#"CREATE TRIGGER IF NOT EXISTS cpu_monitor
ON (SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 2) EVERY '5 minute'::INTERVAL
LABELS (label_name=label_val)
ANNOTATIONS (annotation_name=annotation_val)
NOTIFY
(WEBHOOK alert_manager URL 'http://127.0.0.1:9093' WITH (timeout='1m'))"#;
let stmt =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap()
.pop()
.unwrap();
let Statement::CreateTrigger(stmt) = stmt else {
unreachable!()
};
let query_ctx = QueryContext::arc();
let expr = to_create_trigger_task_expr(stmt, &query_ctx).unwrap();
assert_eq!("greptime", expr.catalog_name);
assert_eq!("cpu_monitor", expr.trigger_name);
assert!(expr.create_if_not_exists);
assert_eq!(
"(SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 2)",
expr.sql
);
assert_eq!(300, expr.interval);
assert_eq!(1, expr.labels.len());
assert_eq!("label_val", expr.labels.get("label_name").unwrap());
assert_eq!(1, expr.annotations.len());
assert_eq!(
"annotation_val",
expr.annotations.get("annotation_name").unwrap()
);
assert_eq!(1, expr.channels.len());
let c = &expr.channels[0];
assert_eq!("alert_manager", c.name,);
let channel_type = c.channel_type.as_ref().unwrap();
let PbChannelType::Webhook(am) = &channel_type;
assert_eq!("http://127.0.0.1:9093", am.url);
assert_eq!(1, am.opts.len());
assert_eq!(
"1m",
am.opts.get("timeout").expect("Expected timeout option")
);
}
}

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::flow::FlowRequestHeader;
use api::v1::flow::{AdjustFlow, FlowRequestHeader};
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_function::handlers::FlowServiceHandler;
@@ -22,6 +22,7 @@ use common_query::error::Result;
use common_telemetry::tracing_context::TracingContext;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use serde_json::json;
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
@@ -57,9 +58,96 @@ impl FlowServiceHandler for FlowServiceOperator {
) -> Result<api::v1::flow::FlowResponse> {
self.flush_inner(catalog, flow, ctx).await
}
async fn adjust(
&self,
catalog: &str,
flow: &str,
min_run_interval_secs: u64,
max_filter_num_per_query: usize,
ctx: QueryContextRef,
) -> Result<api::v1::flow::FlowResponse> {
self.adjust_inner(
catalog,
flow,
min_run_interval_secs,
max_filter_num_per_query,
ctx,
)
.await
}
}
impl FlowServiceOperator {
async fn adjust_inner(
&self,
catalog: &str,
flow: &str,
min_run_interval_secs: u64,
max_filter_num_per_query: usize,
ctx: QueryContextRef,
) -> Result<api::v1::flow::FlowResponse> {
let id = self
.flow_metadata_manager
.flow_name_manager()
.get(catalog, flow)
.await
.map_err(BoxedError::new)
.context(common_query::error::ExecuteSnafu)?
.context(common_meta::error::FlowNotFoundSnafu {
flow_name: format!("{}.{}", catalog, flow),
})
.map_err(BoxedError::new)
.context(common_query::error::ExecuteSnafu)?
.flow_id();
let all_flownode_peers = self
.flow_metadata_manager
.flow_route_manager()
.routes(id)
.await
.map_err(BoxedError::new)
.context(common_query::error::ExecuteSnafu)?;
// order of flownodes doesn't matter here
let all_flow_nodes = FuturesUnordered::from_iter(
all_flownode_peers
.iter()
.map(|(_key, peer)| self.node_manager.flownode(peer.peer())),
)
.collect::<Vec<_>>()
.await;
// TODO(discord9): use proper type for flow options
let options = json!({
"min_run_interval_secs": min_run_interval_secs,
"max_filter_num_per_query": max_filter_num_per_query,
});
for node in all_flow_nodes {
let _res = {
use api::v1::flow::{flow_request, FlowRequest};
let flush_req = FlowRequest {
header: Some(FlowRequestHeader {
tracing_context: TracingContext::from_current_span().to_w3c(),
query_context: Some(
common_meta::rpc::ddl::QueryContext::from(ctx.clone()).into(),
),
}),
body: Some(flow_request::Body::Adjust(AdjustFlow {
flow_id: Some(api::v1::FlowId { id }),
options: options.to_string(),
})),
};
node.handle(flush_req)
.await
.map_err(BoxedError::new)
.context(common_query::error::ExecuteSnafu)?
};
}
Ok(Default::default())
}
/// Flush the flownodes according to the flow id.
async fn flush_inner(
&self,

View File

@@ -147,7 +147,7 @@ impl Inserter {
statement_executor: &StatementExecutor,
) -> Result<Output> {
let row_inserts = ColumnToRow::convert(requests)?;
self.handle_row_inserts(row_inserts, ctx, statement_executor, false)
self.handle_row_inserts(row_inserts, ctx, statement_executor, false, false)
.await
}
@@ -158,6 +158,7 @@ impl Inserter {
ctx: QueryContextRef,
statement_executor: &StatementExecutor,
accommodate_existing_schema: bool,
is_single_value: bool,
) -> Result<Output> {
preprocess_row_insert_requests(&mut requests.inserts)?;
self.handle_row_inserts_with_create_type(
@@ -166,6 +167,7 @@ impl Inserter {
statement_executor,
AutoCreateTableType::Physical,
accommodate_existing_schema,
is_single_value,
)
.await
}
@@ -183,6 +185,7 @@ impl Inserter {
statement_executor,
AutoCreateTableType::Log,
false,
false,
)
.await
}
@@ -199,6 +202,7 @@ impl Inserter {
statement_executor,
AutoCreateTableType::Trace,
false,
false,
)
.await
}
@@ -210,6 +214,7 @@ impl Inserter {
ctx: QueryContextRef,
statement_executor: &StatementExecutor,
accommodate_existing_schema: bool,
is_single_value: bool,
) -> Result<Output> {
self.handle_row_inserts_with_create_type(
requests,
@@ -217,6 +222,7 @@ impl Inserter {
statement_executor,
AutoCreateTableType::LastNonNull,
accommodate_existing_schema,
is_single_value,
)
.await
}
@@ -229,6 +235,7 @@ impl Inserter {
statement_executor: &StatementExecutor,
create_type: AutoCreateTableType,
accommodate_existing_schema: bool,
is_single_value: bool,
) -> Result<Output> {
// remove empty requests
requests.inserts.retain(|req| {
@@ -249,6 +256,7 @@ impl Inserter {
create_type,
statement_executor,
accommodate_existing_schema,
is_single_value,
)
.await?;
@@ -299,6 +307,7 @@ impl Inserter {
AutoCreateTableType::Logical(physical_table.to_string()),
statement_executor,
true,
true,
)
.await?;
let name_to_info = table_infos
@@ -464,9 +473,10 @@ impl Inserter {
/// This mapping is used in the conversion of RowToRegion.
///
/// `accommodate_existing_schema` is used to determine if the existing schema should override the new schema.
/// It only works for TIME_INDEX and VALUE columns. This is for the case where the user creates a table with
/// It only works for TIME_INDEX and single VALUE columns. This is for the case where the user creates a table with
/// custom schema, and then inserts data with endpoints that have default schema setting, like prometheus
/// remote write. This will modify the `RowInsertRequests` in place.
/// `is_single_value` indicates whether the default schema only contains single value column so we can accommodate it.
async fn create_or_alter_tables_on_demand(
&self,
requests: &mut RowInsertRequests,
@@ -474,6 +484,7 @@ impl Inserter {
auto_create_table_type: AutoCreateTableType,
statement_executor: &StatementExecutor,
accommodate_existing_schema: bool,
is_single_value: bool,
) -> Result<CreateAlterTableResult> {
let _timer = crate::metrics::CREATE_ALTER_ON_DEMAND
.with_label_values(&[auto_create_table_type.as_str()])
@@ -537,6 +548,7 @@ impl Inserter {
&table,
ctx,
accommodate_existing_schema,
is_single_value,
)? {
alter_tables.push(alter_expr);
}
@@ -815,12 +827,15 @@ impl Inserter {
/// When `accommodate_existing_schema` is true, it may modify the input `req` to
/// accommodate it with existing schema. See [`create_or_alter_tables_on_demand`](Self::create_or_alter_tables_on_demand)
/// for more details.
/// When `accommodate_existing_schema` is true and `is_single_value` is true, it also consider fields when modifying the
/// input `req`.
fn get_alter_table_expr_on_demand(
&self,
req: &mut RowInsertRequest,
table: &TableRef,
ctx: &QueryContextRef,
accommodate_existing_schema: bool,
is_single_value: bool,
) -> Result<Option<AlterTableExpr>> {
let catalog_name = ctx.current_catalog();
let schema_name = ctx.current_schema();
@@ -838,18 +853,20 @@ impl Inserter {
let table_schema = table.schema();
// Find timestamp column name
let ts_col_name = table_schema.timestamp_column().map(|c| c.name.clone());
// Find field column name if there is only one
// Find field column name if there is only one and `is_single_value` is true.
let mut field_col_name = None;
let mut multiple_field_cols = false;
table.field_columns().for_each(|col| {
if field_col_name.is_none() {
field_col_name = Some(col.name.clone());
} else {
multiple_field_cols = true;
if is_single_value {
let mut multiple_field_cols = false;
table.field_columns().for_each(|col| {
if field_col_name.is_none() {
field_col_name = Some(col.name.clone());
} else {
multiple_field_cols = true;
}
});
if multiple_field_cols {
field_col_name = None;
}
});
if multiple_field_cols {
field_col_name = None;
}
// Update column name in request schema for Timestamp/Field columns
@@ -875,11 +892,11 @@ impl Inserter {
}
}
// Remove from add_columns any column that is timestamp or field (if there is only one field column)
// Only keep columns that are tags or non-single field.
add_columns.add_columns.retain(|col| {
let def = col.column_def.as_ref().unwrap();
def.semantic_type != SemanticType::Timestamp as i32
&& (def.semantic_type != SemanticType::Field as i32 && field_col_name.is_some())
def.semantic_type == SemanticType::Tag as i32
|| (def.semantic_type == SemanticType::Field as i32 && field_col_name.is_none())
});
if add_columns.add_columns.is_empty() {
@@ -1231,7 +1248,7 @@ mod tests {
)),
);
let alter_expr = inserter
.get_alter_table_expr_on_demand(&mut req, &table, &ctx, true)
.get_alter_table_expr_on_demand(&mut req, &table, &ctx, true, true)
.unwrap();
assert!(alter_expr.is_none());

View File

@@ -20,6 +20,10 @@ use api::v1::meta::CreateFlowTask as PbCreateFlowTask;
use api::v1::{
column_def, AlterDatabaseExpr, AlterTableExpr, CreateFlowExpr, CreateTableExpr, CreateViewExpr,
};
#[cfg(feature = "enterprise")]
use api::v1::{
meta::CreateTriggerTask as PbCreateTriggerTask, CreateTriggerExpr as PbCreateTriggerExpr,
};
use catalog::CatalogManagerRef;
use chrono::Utc;
use common_catalog::consts::{is_readonly_schema, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
@@ -31,6 +35,8 @@ use common_meta::ddl::ExecutorContext;
use common_meta::instruction::CacheIdent;
use common_meta::key::schema_name::{SchemaName, SchemaNameKey};
use common_meta::key::NAME_PATTERN;
#[cfg(feature = "enterprise")]
use common_meta::rpc::ddl::trigger::CreateTriggerTask;
use common_meta::rpc::ddl::{
CreateFlowTask, DdlTask, DropFlowTask, DropViewTask, SubmitDdlTaskRequest,
SubmitDdlTaskResponse,
@@ -58,6 +64,8 @@ use session::table_name::table_idents_to_full_name;
use snafu::{ensure, OptionExt, ResultExt};
use sql::parser::{ParseOptions, ParserContext};
use sql::statements::alter::{AlterDatabase, AlterTable};
#[cfg(feature = "enterprise")]
use sql::statements::create::trigger::CreateTrigger;
use sql::statements::create::{
CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions,
};
@@ -347,10 +355,43 @@ impl StatementExecutor {
#[tracing::instrument(skip_all)]
pub async fn create_trigger(
&self,
_stmt: sql::statements::create::trigger::CreateTrigger,
_query_context: QueryContextRef,
stmt: CreateTrigger,
query_context: QueryContextRef,
) -> Result<Output> {
crate::error::UnsupportedTriggerSnafu {}.fail()
let expr = expr_helper::to_create_trigger_task_expr(stmt, &query_context)?;
self.create_trigger_inner(expr, query_context).await
}
#[cfg(feature = "enterprise")]
pub async fn create_trigger_inner(
&self,
expr: PbCreateTriggerExpr,
query_context: QueryContextRef,
) -> Result<Output> {
self.create_trigger_procedure(expr, query_context).await?;
Ok(Output::new_with_affected_rows(0))
}
#[cfg(feature = "enterprise")]
async fn create_trigger_procedure(
&self,
expr: PbCreateTriggerExpr,
query_context: QueryContextRef,
) -> Result<SubmitDdlTaskResponse> {
let task = CreateTriggerTask::try_from(PbCreateTriggerTask {
create_trigger: Some(expr),
})
.context(error::InvalidExprSnafu)?;
let request = SubmitDdlTaskRequest {
query_context,
task: DdlTask::new_create_trigger(task),
};
self.procedure_executor
.submit_ddl_task(&ExecutorContext::default(), request)
.await
.context(error::ExecuteDdlSnafu)
}
#[tracing::instrument(skip_all)]

View File

@@ -13,6 +13,7 @@
// limitations under the License.
#![allow(dead_code)]
pub mod ctx_req;
pub mod field;
pub mod processor;
pub mod transform;
@@ -153,21 +154,39 @@ impl DispatchedTo {
/// The result of pipeline execution
#[derive(Debug)]
pub enum PipelineExecOutput {
Transformed((Row, Option<String>)),
// table_suffix, ts_key -> unit
AutoTransform(Option<String>, HashMap<String, TimeUnit>),
Transformed(TransformedOutput),
AutoTransform(AutoTransformOutput),
DispatchedTo(DispatchedTo),
}
#[derive(Debug)]
pub struct TransformedOutput {
pub opt: String,
pub row: Row,
pub table_suffix: Option<String>,
}
#[derive(Debug)]
pub struct AutoTransformOutput {
pub table_suffix: Option<String>,
// ts_column_name -> unit
pub ts_unit_map: HashMap<String, TimeUnit>,
}
impl PipelineExecOutput {
// Note: This is a test only function, do not use it in production.
pub fn into_transformed(self) -> Option<(Row, Option<String>)> {
if let Self::Transformed(o) = self {
Some(o)
if let Self::Transformed(TransformedOutput {
row, table_suffix, ..
}) = self
{
Some((row, table_suffix))
} else {
None
}
}
// Note: This is a test only function, do not use it in production.
pub fn into_dispatched(self) -> Option<DispatchedTo> {
if let Self::DispatchedTo(d) = self {
Some(d)
@@ -224,9 +243,13 @@ impl Pipeline {
}
if let Some(transformer) = self.transformer() {
let row = transformer.transform_mut(val)?;
let (opt, row) = transformer.transform_mut(val)?;
let table_suffix = self.tablesuffix.as_ref().and_then(|t| t.apply(val));
Ok(PipelineExecOutput::Transformed((row, table_suffix)))
Ok(PipelineExecOutput::Transformed(TransformedOutput {
opt,
row,
table_suffix,
}))
} else {
let table_suffix = self.tablesuffix.as_ref().and_then(|t| t.apply(val));
let mut ts_unit_map = HashMap::with_capacity(4);
@@ -238,7 +261,10 @@ impl Pipeline {
}
}
}
Ok(PipelineExecOutput::AutoTransform(table_suffix, ts_unit_map))
Ok(PipelineExecOutput::AutoTransform(AutoTransformOutput {
table_suffix,
ts_unit_map,
}))
}
}

View File

@@ -0,0 +1,153 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::hash_map::IntoIter;
use std::collections::BTreeMap;
use std::sync::Arc;
use ahash::{HashMap, HashMapExt};
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
use itertools::Itertools;
use session::context::{QueryContext, QueryContextRef};
use crate::PipelineMap;
const DEFAULT_OPT: &str = "";
pub const PIPELINE_HINT_KEYS: [&str; 6] = [
"greptime_auto_create_table",
"greptime_ttl",
"greptime_append_mode",
"greptime_merge_mode",
"greptime_physical_table",
"greptime_skip_wal",
];
const PIPELINE_HINT_PREFIX: &str = "greptime_";
// Remove hints from the pipeline context and form a option string
// e.g: skip_wal=true,ttl=1d
pub fn from_pipeline_map_to_opt(pipeline_map: &mut PipelineMap) -> String {
let mut btreemap = BTreeMap::new();
for k in PIPELINE_HINT_KEYS {
if let Some(v) = pipeline_map.remove(k) {
btreemap.insert(k, v.to_str_value());
}
}
btreemap
.into_iter()
.map(|(k, v)| format!("{}={}", k.replace(PIPELINE_HINT_PREFIX, ""), v))
.join(",")
}
// split the option string back to a map
fn from_opt_to_map(opt: &str) -> HashMap<&str, &str> {
opt.split(',')
.filter_map(|s| {
s.split_once("=")
.filter(|(k, v)| !k.is_empty() && !v.is_empty())
})
.collect()
}
// ContextReq is a collection of row insert requests with different options.
// The default option is empty string.
// Because options are set in query context, we have to split them into sequential calls
// e.g:
// {
// "skip_wal=true,ttl=1d": [RowInsertRequest],
// "ttl=1d": [RowInsertRequest],
// }
#[derive(Debug, Default)]
pub struct ContextReq {
req: HashMap<String, Vec<RowInsertRequest>>,
}
impl ContextReq {
pub fn from_opt_map(opt_map: HashMap<String, Rows>, table_name: String) -> Self {
Self {
req: opt_map
.into_iter()
.map(|(opt, rows)| {
(
opt,
vec![RowInsertRequest {
table_name: table_name.clone(),
rows: Some(rows),
}],
)
})
.collect::<HashMap<String, Vec<RowInsertRequest>>>(),
}
}
pub fn default_opt_with_reqs(reqs: Vec<RowInsertRequest>) -> Self {
let mut req_map = HashMap::new();
req_map.insert(DEFAULT_OPT.to_string(), reqs);
Self { req: req_map }
}
pub fn add_rows(&mut self, opt: String, req: RowInsertRequest) {
self.req.entry(opt).or_default().push(req);
}
pub fn merge(&mut self, other: Self) {
for (opt, req) in other.req {
self.req.entry(opt).or_default().extend(req);
}
}
pub fn as_req_iter(self, ctx: QueryContextRef) -> ContextReqIter {
let ctx = (*ctx).clone();
ContextReqIter {
opt_req: self.req.into_iter(),
ctx_template: ctx,
}
}
pub fn all_req(self) -> impl Iterator<Item = RowInsertRequest> {
self.req.into_iter().flat_map(|(_, req)| req)
}
pub fn ref_all_req(&self) -> impl Iterator<Item = &RowInsertRequest> {
self.req.values().flatten()
}
}
// ContextReqIter is an iterator that iterates over the ContextReq.
// The context template is cloned from the original query context.
// It will clone the query context for each option and set the options to the context.
// Then it will return the context and the row insert requests for actual insert.
pub struct ContextReqIter {
opt_req: IntoIter<String, Vec<RowInsertRequest>>,
ctx_template: QueryContext,
}
impl Iterator for ContextReqIter {
type Item = (QueryContextRef, RowInsertRequests);
fn next(&mut self) -> Option<Self::Item> {
let (opt, req_vec) = self.opt_req.next()?;
let opt_map = from_opt_to_map(&opt);
let mut ctx = self.ctx_template.clone();
for (k, v) in opt_map {
ctx.set_extension(k, v);
}
Some((Arc::new(ctx), RowInsertRequests { inserts: req_vec }))
}
}

View File

@@ -40,7 +40,7 @@ use crate::etl::transform::index::Index;
use crate::etl::transform::{Transform, Transforms};
use crate::etl::value::{Timestamp, Value};
use crate::etl::PipelineMap;
use crate::PipelineContext;
use crate::{from_pipeline_map_to_opt, PipelineContext};
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
@@ -185,13 +185,15 @@ impl GreptimeTransformer {
}
}
pub fn transform_mut(&self, val: &mut PipelineMap) -> Result<Row> {
pub fn transform_mut(&self, pipeline_map: &mut PipelineMap) -> Result<(String, Row)> {
let opt = from_pipeline_map_to_opt(pipeline_map);
let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
let mut output_index = 0;
for transform in self.transforms.iter() {
for field in transform.fields.iter() {
let index = field.input_field();
match val.get(index) {
match pipeline_map.get(index) {
Some(v) => {
let value_data = coerce_value(v, transform)?;
// every transform fields has only one output field
@@ -217,7 +219,7 @@ impl GreptimeTransformer {
output_index += 1;
}
}
Ok(Row { values })
Ok((opt, Row { values }))
}
pub fn transforms(&self) -> &Transforms {
@@ -517,8 +519,7 @@ fn resolve_value(
fn identity_pipeline_inner(
pipeline_maps: Vec<PipelineMap>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<(SchemaInfo, Vec<Row>)> {
let mut rows = Vec::with_capacity(pipeline_maps.len());
) -> Result<(SchemaInfo, HashMap<String, Vec<Row>>)> {
let mut schema_info = SchemaInfo::default();
let custom_ts = pipeline_ctx.pipeline_definition.get_custom_ts();
@@ -539,20 +540,30 @@ fn identity_pipeline_inner(
options: None,
});
for values in pipeline_maps {
let row = values_to_row(&mut schema_info, values, pipeline_ctx)?;
rows.push(row);
let mut opt_map = HashMap::new();
let len = pipeline_maps.len();
for mut pipeline_map in pipeline_maps {
let opt = from_pipeline_map_to_opt(&mut pipeline_map);
let row = values_to_row(&mut schema_info, pipeline_map, pipeline_ctx)?;
opt_map
.entry(opt)
.or_insert_with(|| Vec::with_capacity(len))
.push(row);
}
let column_count = schema_info.schema.len();
for row in rows.iter_mut() {
let diff = column_count - row.values.len();
for _ in 0..diff {
row.values.push(GreptimeValue { value_data: None });
for (_, row) in opt_map.iter_mut() {
for row in row.iter_mut() {
let diff = column_count - row.values.len();
for _ in 0..diff {
row.values.push(GreptimeValue { value_data: None });
}
}
}
Ok((schema_info, rows))
Ok((schema_info, opt_map))
}
/// Identity pipeline for Greptime
@@ -567,7 +578,7 @@ pub fn identity_pipeline(
array: Vec<PipelineMap>,
table: Option<Arc<table::Table>>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<Rows> {
) -> Result<HashMap<String, Rows>> {
let input = if pipeline_ctx.pipeline_param.flatten_json_object() {
array
.into_iter()
@@ -577,7 +588,7 @@ pub fn identity_pipeline(
array
};
identity_pipeline_inner(input, pipeline_ctx).map(|(mut schema, rows)| {
identity_pipeline_inner(input, pipeline_ctx).map(|(mut schema, opt_map)| {
if let Some(table) = table {
let table_info = table.table_info();
for tag_name in table_info.meta.row_key_column_names() {
@@ -586,10 +597,19 @@ pub fn identity_pipeline(
}
}
}
Rows {
schema: schema.schema,
rows,
}
opt_map
.into_iter()
.map(|(opt, rows)| {
(
opt,
Rows {
schema: schema.schema.clone(),
rows,
},
)
})
.collect::<HashMap<String, Rows>>()
})
}
@@ -739,7 +759,9 @@ mod tests {
];
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
assert!(rows.is_ok());
let rows = rows.unwrap();
let mut rows = rows.unwrap();
assert!(rows.len() == 1);
let rows = rows.remove("").unwrap();
assert_eq!(rows.schema.len(), 8);
assert_eq!(rows.rows.len(), 2);
assert_eq!(8, rows.rows[0].values.len());
@@ -769,12 +791,16 @@ mod tests {
let tag_column_names = ["name".to_string(), "address".to_string()];
let rows = identity_pipeline_inner(json_array_to_map(array).unwrap(), &pipeline_ctx)
.map(|(mut schema, rows)| {
.map(|(mut schema, mut rows)| {
for name in tag_column_names {
if let Some(index) = schema.index.get(&name) {
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
}
}
assert!(rows.len() == 1);
let rows = rows.remove("").unwrap();
Rows {
schema: schema.schema,
rows,

View File

@@ -19,14 +19,16 @@ mod manager;
mod metrics;
mod tablesuffix;
pub use etl::ctx_req::{from_pipeline_map_to_opt, ContextReq};
pub use etl::processor::Processor;
pub use etl::transform::transformer::greptime::{GreptimePipelineParams, SchemaInfo};
pub use etl::transform::transformer::identity_pipeline;
pub use etl::transform::GreptimeTransformer;
pub use etl::value::{Array, Map, Value};
pub use etl::{
json_array_to_map, json_to_map, parse, simd_json_array_to_map, simd_json_to_map, Content,
DispatchedTo, Pipeline, PipelineExecOutput, PipelineMap,
json_array_to_map, json_to_map, parse, simd_json_array_to_map, simd_json_to_map,
AutoTransformOutput, Content, DispatchedTo, Pipeline, PipelineExecOutput, PipelineMap,
TransformedOutput,
};
pub use manager::{
pipeline_operator, table, util, IdentityTimeIndex, PipelineContext, PipelineDefinition,

View File

@@ -236,6 +236,7 @@ impl PipelineTable {
Self::query_ctx(&table_info),
&self.statement_executor,
false,
false,
)
.await
.context(InsertPipelineSnafu)?;

View File

@@ -38,6 +38,7 @@ use common_telemetry::{debug, error, tracing, warn};
use common_time::timezone::parse_timezone;
use futures_util::StreamExt;
use session::context::{QueryContext, QueryContextBuilder, QueryContextRef};
use session::hints::READ_PREFERENCE_HINT;
use snafu::{OptionExt, ResultExt};
use table::metadata::TableId;
use tokio::sync::mpsc;
@@ -49,7 +50,6 @@ use crate::error::{
};
use crate::grpc::flight::{PutRecordBatchRequest, PutRecordBatchRequestStream};
use crate::grpc::TonicResult;
use crate::hint_headers::READ_PREFERENCE_HINT;
use crate::metrics;
use crate::metrics::{METRIC_AUTH_FAILURE, METRIC_SERVER_GRPC_DB_REQUEST_TIMER};
use crate::query_handler::grpc::ServerGrpcQueryHandlerRef;

View File

@@ -13,23 +13,9 @@
// limitations under the License.
use http::HeaderMap;
use session::hints::{HINTS_KEY, HINTS_KEY_PREFIX, HINT_KEYS};
use tonic::metadata::MetadataMap;
// For the given format: `x-greptime-hints: auto_create_table=true, ttl=7d`
pub const HINTS_KEY: &str = "x-greptime-hints";
pub const READ_PREFERENCE_HINT: &str = "read_preference";
const HINT_KEYS: [&str; 7] = [
"x-greptime-hint-auto_create_table",
"x-greptime-hint-ttl",
"x-greptime-hint-append_mode",
"x-greptime-hint-merge_mode",
"x-greptime-hint-physical_table",
"x-greptime-hint-skip_wal",
"x-greptime-hint-read_preference",
];
pub(crate) fn extract_hints<T: ToHeaderMap>(headers: &T) -> Vec<(String, String)> {
let mut hints = Vec::new();
if let Some(value_str) = headers.get(HINTS_KEY) {
@@ -44,7 +30,7 @@ pub(crate) fn extract_hints<T: ToHeaderMap>(headers: &T) -> Vec<(String, String)
}
for key in HINT_KEYS.iter() {
if let Some(value) = headers.get(key) {
let new_key = key.replace("x-greptime-hint-", "");
let new_key = key.replace(HINTS_KEY_PREFIX, "");
hints.push((new_key, value.trim().to_string()));
}
}

View File

@@ -18,7 +18,6 @@ use std::str::FromStr;
use std::sync::Arc;
use std::time::Instant;
use api::v1::RowInsertRequests;
use async_trait::async_trait;
use axum::body::Bytes;
use axum::extract::{FromRequest, Multipart, Path, Query, Request, State};
@@ -34,7 +33,9 @@ use datatypes::value::column_data_to_json;
use headers::ContentType;
use lazy_static::lazy_static;
use pipeline::util::to_pipeline_version;
use pipeline::{GreptimePipelineParams, PipelineContext, PipelineDefinition, PipelineMap};
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, PipelineMap,
};
use serde::{Deserialize, Serialize};
use serde_json::{json, Deserializer, Map, Value};
use session::context::{Channel, QueryContext, QueryContextRef};
@@ -345,7 +346,7 @@ async fn dryrun_pipeline_inner(
let name_key = "name";
let results = results
.into_iter()
.all_req()
.filter_map(|row| {
if let Some(rows) = row.rows {
let table_name = row.table_name;
@@ -798,7 +799,7 @@ pub(crate) async fn ingest_logs_inner(
let db = query_ctx.get_db_string();
let exec_timer = std::time::Instant::now();
let mut insert_requests = Vec::with_capacity(log_ingest_requests.len());
let mut req = ContextReq::default();
let pipeline_params = GreptimePipelineParams::from_params(
headers
@@ -811,36 +812,42 @@ pub(crate) async fn ingest_logs_inner(
let requests =
run_pipeline(&handler, &pipeline_ctx, pipeline_req, &query_ctx, true).await?;
insert_requests.extend(requests);
req.merge(requests);
}
let output = handler
.insert(
RowInsertRequests {
inserts: insert_requests,
},
query_ctx,
)
.await;
let mut outputs = Vec::new();
let mut total_rows: u64 = 0;
let mut fail = false;
for (temp_ctx, act_req) in req.as_req_iter(query_ctx) {
let output = handler.insert(act_req, temp_ctx).await;
if let Ok(Output {
data: OutputData::AffectedRows(rows),
meta: _,
}) = &output
{
if let Ok(Output {
data: OutputData::AffectedRows(rows),
meta: _,
}) = &output
{
total_rows += *rows as u64;
} else {
fail = true;
}
outputs.push(output);
}
if total_rows > 0 {
METRIC_HTTP_LOGS_INGESTION_COUNTER
.with_label_values(&[db.as_str()])
.inc_by(*rows as u64);
.inc_by(total_rows);
METRIC_HTTP_LOGS_INGESTION_ELAPSED
.with_label_values(&[db.as_str(), METRIC_SUCCESS_VALUE])
.observe(exec_timer.elapsed().as_secs_f64());
} else {
}
if fail {
METRIC_HTTP_LOGS_INGESTION_ELAPSED
.with_label_values(&[db.as_str(), METRIC_FAILURE_VALUE])
.observe(exec_timer.elapsed().as_secs_f64());
}
let response = GreptimedbV1Response::from_output(vec![output])
let response = GreptimedbV1Response::from_output(outputs)
.await
.with_execution_time(exec_timer.elapsed().as_millis() as u64);
Ok(response)

View File

@@ -166,7 +166,7 @@ pub async fn logs(
resp_body: ExportLogsServiceResponse {
partial_success: None,
},
write_cost: o.meta.cost,
write_cost: o.iter().map(|o| o.meta.cost).sum(),
})
}

View File

@@ -15,7 +15,6 @@
use std::sync::Arc;
use api::prom_store::remote::ReadRequest;
use api::v1::RowInsertRequests;
use axum::body::Bytes;
use axum::extract::{Query, State};
use axum::http::{header, HeaderValue, StatusCode};
@@ -29,7 +28,7 @@ use hyper::HeaderMap;
use lazy_static::lazy_static;
use object_pool::Pool;
use pipeline::util::to_pipeline_version;
use pipeline::PipelineDefinition;
use pipeline::{ContextReq, PipelineDefinition};
use prost::Message;
use serde::{Deserialize, Serialize};
use session::context::{Channel, QueryContext};
@@ -133,18 +132,24 @@ pub async fn remote_write(
processor.set_pipeline(pipeline_handler, query_ctx.clone(), pipeline_def);
}
let (request, samples) =
let req =
decode_remote_write_request(is_zstd, body, prom_validation_mode, &mut processor).await?;
let output = prom_store_handler
.write(request, query_ctx, prom_store_with_metric_engine)
.await?;
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(samples as u64);
Ok((
StatusCode::NO_CONTENT,
write_cost_header_map(output.meta.cost),
)
.into_response())
let mut cost = 0;
for (temp_ctx, reqs) in req.as_req_iter(query_ctx) {
let cnt: u64 = reqs
.inserts
.iter()
.filter_map(|s| s.rows.as_ref().map(|r| r.rows.len() as u64))
.sum();
let output = prom_store_handler
.write(reqs, temp_ctx, prom_store_with_metric_engine)
.await?;
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(cnt);
cost += output.meta.cost;
}
Ok((StatusCode::NO_CONTENT, write_cost_header_map(cost)).into_response())
}
impl IntoResponse for PromStoreResponse {
@@ -202,7 +207,7 @@ async fn decode_remote_write_request(
body: Bytes,
prom_validation_mode: PromValidationMode,
processor: &mut PromSeriesProcessor,
) -> Result<(RowInsertRequests, usize)> {
) -> Result<ContextReq> {
let _timer = crate::metrics::METRIC_HTTP_PROM_STORE_DECODE_ELAPSED.start_timer();
// due to vmagent's limitation, there is a chance that vmagent is
@@ -227,7 +232,8 @@ async fn decode_remote_write_request(
if processor.use_pipeline {
processor.exec_pipeline().await
} else {
Ok(request.as_row_insert_requests())
let reqs = request.as_row_insert_requests();
Ok(ContextReq::default_opt_with_reqs(reqs))
}
}

View File

@@ -183,7 +183,7 @@ fn select_variable(query: &str, query_context: QueryContextRef) -> Option<Output
// get value of variables from known sources or fallback to defaults
let value = match var_as[0] {
"time_zone" => query_context.timezone().to_string(),
"session.time_zone" | "time_zone" => query_context.timezone().to_string(),
"system_time_zone" => system_timezone_name(),
_ => VAR_VALUES
.get(var_as[0])

View File

@@ -18,13 +18,15 @@ use api::v1::column_data_type_extension::TypeExt;
use api::v1::value::ValueData;
use api::v1::{
ColumnDataType, ColumnDataTypeExtension, ColumnOptions, ColumnSchema, JsonTypeExtension, Row,
RowInsertRequest, RowInsertRequests, Rows, SemanticType, Value as GreptimeValue,
RowInsertRequest, Rows, SemanticType, Value as GreptimeValue,
};
use jsonb::{Number as JsonbNumber, Value as JsonbValue};
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
use opentelemetry_proto::tonic::common::v1::{any_value, AnyValue, InstrumentationScope, KeyValue};
use opentelemetry_proto::tonic::logs::v1::{LogRecord, ResourceLogs, ScopeLogs};
use pipeline::{GreptimePipelineParams, PipelineContext, PipelineWay, SchemaInfo, SelectInfo};
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineWay, SchemaInfo, SelectInfo,
};
use serde_json::{Map, Value};
use session::context::QueryContextRef;
use snafu::{ensure, ResultExt};
@@ -55,21 +57,16 @@ pub async fn to_grpc_insert_requests(
table_name: String,
query_ctx: &QueryContextRef,
pipeline_handler: PipelineHandlerRef,
) -> Result<(RowInsertRequests, usize)> {
) -> Result<ContextReq> {
match pipeline {
PipelineWay::OtlpLogDirect(select_info) => {
let rows = parse_export_logs_service_request_to_rows(request, select_info)?;
let len = rows.rows.len();
let insert_request = RowInsertRequest {
rows: Some(rows),
table_name,
};
Ok((
RowInsertRequests {
inserts: vec![insert_request],
},
len,
))
Ok(ContextReq::default_opt_with_reqs(vec![insert_request]))
}
PipelineWay::Pipeline(pipeline_def) => {
let data = parse_export_logs_service_request(request);
@@ -77,7 +74,7 @@ pub async fn to_grpc_insert_requests(
let pipeline_ctx =
PipelineContext::new(&pipeline_def, &pipeline_params, query_ctx.channel());
let inserts = run_pipeline(
run_pipeline(
&pipeline_handler,
&pipeline_ctx,
PipelineIngestRequest {
@@ -87,20 +84,7 @@ pub async fn to_grpc_insert_requests(
query_ctx,
true,
)
.await?;
let len = inserts
.iter()
.map(|insert| {
insert
.rows
.as_ref()
.map(|rows| rows.rows.len())
.unwrap_or(0)
})
.sum();
let insert_requests = RowInsertRequests { inserts };
Ok((insert_requests, len))
.await
}
_ => NotSupportedSnafu {
feat: "Unsupported pipeline for logs",

View File

@@ -20,8 +20,9 @@ use api::v1::{RowInsertRequest, Rows};
use itertools::Itertools;
use pipeline::error::AutoTransformOneTimestampSnafu;
use pipeline::{
DispatchedTo, IdentityTimeIndex, Pipeline, PipelineContext, PipelineDefinition,
PipelineExecOutput, PipelineMap, GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
AutoTransformOutput, ContextReq, DispatchedTo, IdentityTimeIndex, Pipeline, PipelineContext,
PipelineDefinition, PipelineExecOutput, PipelineMap, TransformedOutput,
GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
};
use session::context::{Channel, QueryContextRef};
use snafu::{OptionExt, ResultExt};
@@ -66,7 +67,7 @@ pub(crate) async fn run_pipeline(
pipeline_req: PipelineIngestRequest,
query_ctx: &QueryContextRef,
is_top_level: bool,
) -> Result<Vec<RowInsertRequest>> {
) -> Result<ContextReq> {
if pipeline_ctx.pipeline_definition.is_identity() {
run_identity_pipeline(handler, pipeline_ctx, pipeline_req, query_ctx).await
} else {
@@ -79,7 +80,7 @@ async fn run_identity_pipeline(
pipeline_ctx: &PipelineContext<'_>,
pipeline_req: PipelineIngestRequest,
query_ctx: &QueryContextRef,
) -> Result<Vec<RowInsertRequest>> {
) -> Result<ContextReq> {
let PipelineIngestRequest {
table: table_name,
values: data_array,
@@ -93,12 +94,7 @@ async fn run_identity_pipeline(
.context(CatalogSnafu)?
};
pipeline::identity_pipeline(data_array, table, pipeline_ctx)
.map(|rows| {
vec![RowInsertRequest {
rows: Some(rows),
table_name,
}]
})
.map(|opt_map| ContextReq::from_opt_map(opt_map, table_name))
.context(PipelineSnafu)
}
@@ -108,7 +104,7 @@ async fn run_custom_pipeline(
pipeline_req: PipelineIngestRequest,
query_ctx: &QueryContextRef,
is_top_level: bool,
) -> Result<Vec<RowInsertRequest>> {
) -> Result<ContextReq> {
let db = query_ctx.get_db_string();
let pipeline = get_pipeline(pipeline_ctx.pipeline_definition, handler, query_ctx).await?;
@@ -135,17 +131,24 @@ async fn run_custom_pipeline(
.context(PipelineSnafu)?;
match r {
PipelineExecOutput::Transformed((row, table_suffix)) => {
PipelineExecOutput::Transformed(TransformedOutput {
opt,
row,
table_suffix,
}) => {
let act_table_name = table_suffix_to_table_name(&table_name, table_suffix);
push_to_map!(transformed_map, act_table_name, row, arr_len);
push_to_map!(transformed_map, (opt, act_table_name), row, arr_len);
}
PipelineExecOutput::AutoTransform(table_suffix, ts_keys) => {
PipelineExecOutput::AutoTransform(AutoTransformOutput {
table_suffix,
ts_unit_map,
}) => {
let act_table_name = table_suffix_to_table_name(&table_name, table_suffix);
push_to_map!(auto_map, act_table_name.clone(), pipeline_map, arr_len);
auto_map_ts_keys
.entry(act_table_name)
.or_insert_with(HashMap::new)
.extend(ts_keys);
.extend(ts_unit_map);
}
PipelineExecOutput::DispatchedTo(dispatched_to) => {
push_to_map!(dispatched, dispatched_to, pipeline_map, arr_len);
@@ -153,7 +156,7 @@ async fn run_custom_pipeline(
}
}
let mut results = Vec::new();
let mut results = ContextReq::default();
if let Some(s) = pipeline.schemas() {
// transformed
@@ -161,14 +164,17 @@ async fn run_custom_pipeline(
// if current pipeline generates some transformed results, build it as
// `RowInsertRequest` and append to results. If the pipeline doesn't
// have dispatch, this will be only output of the pipeline.
for (table_name, rows) in transformed_map {
results.push(RowInsertRequest {
rows: Some(Rows {
rows,
schema: s.clone(),
}),
table_name,
});
for ((opt, table_name), rows) in transformed_map {
results.add_rows(
opt,
RowInsertRequest {
rows: Some(Rows {
rows,
schema: s.clone(),
}),
table_name,
},
);
}
} else {
// auto map
@@ -205,7 +211,7 @@ async fn run_custom_pipeline(
)
.await?;
results.extend(reqs);
results.merge(reqs);
}
}
@@ -240,7 +246,7 @@ async fn run_custom_pipeline(
))
.await?;
results.extend(requests);
results.merge(requests);
}
if is_top_level {

View File

@@ -18,10 +18,7 @@ use std::string::ToString;
use ahash::HashMap;
use api::prom_store::remote::Sample;
use api::v1::value::ValueData;
use api::v1::{
ColumnDataType, ColumnSchema, Row, RowInsertRequest, RowInsertRequests, Rows, SemanticType,
Value,
};
use api::v1::{ColumnDataType, ColumnSchema, Row, RowInsertRequest, Rows, SemanticType, Value};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use prost::DecodeError;
@@ -55,17 +52,11 @@ impl TablesBuilder {
}
/// Converts [TablesBuilder] to [RowInsertRequests] and row numbers and clears inner states.
pub(crate) fn as_insert_requests(&mut self) -> (RowInsertRequests, usize) {
let mut total_rows = 0;
let inserts = self
.tables
pub(crate) fn as_insert_requests(&mut self) -> Vec<RowInsertRequest> {
self.tables
.drain()
.map(|(name, mut table)| {
total_rows += table.num_rows();
table.as_row_insert_request(name)
})
.collect();
(RowInsertRequests { inserts }, total_rows)
.map(|(name, mut table)| table.as_row_insert_request(name))
.collect()
}
}
@@ -116,11 +107,6 @@ impl TableBuilder {
}
}
/// Total number of rows inside table builder.
fn num_rows(&self) -> usize {
self.rows.len()
}
/// Adds a set of labels and samples to table builder.
pub(crate) fn add_labels_and_samples(
&mut self,

View File

@@ -18,11 +18,13 @@ use std::ops::Deref;
use std::slice;
use api::prom_store::remote::Sample;
use api::v1::RowInsertRequests;
use api::v1::RowInsertRequest;
use bytes::{Buf, Bytes};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use common_telemetry::debug;
use pipeline::{GreptimePipelineParams, PipelineContext, PipelineDefinition, PipelineMap, Value};
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, PipelineMap, Value,
};
use prost::encoding::message::merge;
use prost::encoding::{decode_key, decode_varint, WireType};
use prost::DecodeError;
@@ -267,7 +269,7 @@ impl Clear for PromWriteRequest {
}
impl PromWriteRequest {
pub fn as_row_insert_requests(&mut self) -> (RowInsertRequests, usize) {
pub fn as_row_insert_requests(&mut self) -> Vec<RowInsertRequest> {
self.table_data.as_insert_requests()
}
@@ -409,9 +411,7 @@ impl PromSeriesProcessor {
Ok(())
}
pub(crate) async fn exec_pipeline(
&mut self,
) -> crate::error::Result<(RowInsertRequests, usize)> {
pub(crate) async fn exec_pipeline(&mut self) -> crate::error::Result<ContextReq> {
// prepare params
let handler = self.pipeline_handler.as_ref().context(InternalSnafu {
err_msg: "pipeline handler is not set",
@@ -425,10 +425,9 @@ impl PromSeriesProcessor {
})?;
let pipeline_ctx = PipelineContext::new(pipeline_def, &pipeline_param, query_ctx.channel());
let mut size = 0;
// run pipeline
let mut inserts = Vec::with_capacity(self.table_values.len());
let mut req = ContextReq::default();
for (table_name, pipeline_maps) in self.table_values.iter_mut() {
let pipeline_req = PipelineIngestRequest {
table: table_name.clone(),
@@ -436,16 +435,10 @@ impl PromSeriesProcessor {
};
let row_req =
run_pipeline(handler, &pipeline_ctx, pipeline_req, query_ctx, true).await?;
size += row_req
.iter()
.map(|rq| rq.rows.as_ref().map(|r| r.rows.len()).unwrap_or(0))
.sum::<usize>();
inserts.extend(row_req);
req.merge(row_req);
}
let row_insert_requests = RowInsertRequests { inserts };
Ok((row_insert_requests, size))
Ok(req)
}
}
@@ -489,7 +482,13 @@ mod tests {
prom_write_request
.merge(data.clone(), PromValidationMode::Strict, &mut p)
.unwrap();
let (prom_rows, samples) = prom_write_request.as_row_insert_requests();
let req = prom_write_request.as_row_insert_requests();
let samples = req
.iter()
.filter_map(|r| r.rows.as_ref().map(|r| r.rows.len()))
.sum::<usize>();
let prom_rows = RowInsertRequests { inserts: req };
assert_eq!(expected_samples, samples);
assert_eq!(expected_rows.inserts.len(), prom_rows.inserts.len());

View File

@@ -122,7 +122,7 @@ pub trait OpenTelemetryProtocolHandler: PipelineHandler {
pipeline_params: GreptimePipelineParams,
table_name: String,
ctx: QueryContextRef,
) -> Result<Output>;
) -> Result<Vec<Output>>;
}
/// PipelineHandler is responsible for handling pipeline related requests.

29
src/session/src/hints.rs Normal file
View File

@@ -0,0 +1,29 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// For the given format: `x-greptime-hints: auto_create_table=true, ttl=7d`
pub const HINTS_KEY: &str = "x-greptime-hints";
pub const HINTS_KEY_PREFIX: &str = "x-greptime-hint-";
pub const READ_PREFERENCE_HINT: &str = "read_preference";
pub const HINT_KEYS: [&str; 7] = [
"x-greptime-hint-auto_create_table",
"x-greptime-hint-ttl",
"x-greptime-hint-append_mode",
"x-greptime-hint-merge_mode",
"x-greptime-hint-physical_table",
"x-greptime-hint-skip_wal",
"x-greptime-hint-read_preference",
];

View File

@@ -13,6 +13,7 @@
// limitations under the License.
pub mod context;
pub mod hints;
pub mod session_config;
pub mod table_name;

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{Display, Formatter};
use std::mem::size_of;
use crate::logstore::provider::Provider;
@@ -30,6 +31,15 @@ pub enum Entry {
MultiplePart(MultiplePartEntry),
}
impl Display for Entry {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Entry::Naive(entry) => write!(f, "{}", entry),
Entry::MultiplePart(entry) => write!(f, "{}", entry),
}
}
}
impl Entry {
/// Into [NaiveEntry] if it's type of [Entry::Naive].
pub fn into_naive_entry(self) -> Option<NaiveEntry> {
@@ -56,6 +66,16 @@ pub struct NaiveEntry {
pub data: Vec<u8>,
}
impl Display for NaiveEntry {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"NaiveEntry(provider={:?}, region_id={}, entry_id={})",
self.provider, self.region_id, self.entry_id,
)
}
}
impl NaiveEntry {
/// Estimates the persisted size of the entry.
fn estimated_size(&self) -> usize {
@@ -79,6 +99,19 @@ pub struct MultiplePartEntry {
pub parts: Vec<Vec<u8>>,
}
impl Display for MultiplePartEntry {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"MultiplePartEntry(provider={:?}, region_id={}, entry_id={}, len={})",
self.provider,
self.region_id,
self.entry_id,
self.parts.len()
)
}
}
impl MultiplePartEntry {
fn is_complete(&self) -> bool {
self.headers.contains(&MultiplePartHeader::First)

View File

@@ -69,10 +69,10 @@ impl Display for Provider {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self {
Provider::RaftEngine(provider) => {
write!(f, "region: {}", RegionId::from_u64(provider.id))
write!(f, "RaftEngine(region={})", RegionId::from_u64(provider.id))
}
Provider::Kafka(provider) => write!(f, "topic: {}", provider.topic),
Provider::Noop => write!(f, "noop"),
Provider::Kafka(provider) => write!(f, "Kafka(topic={})", provider.topic),
Provider::Noop => write!(f, "Noop"),
}
}
}

View File

@@ -6,6 +6,14 @@ license.workspace = true
[features]
dashboard = []
enterprise = [
"cmd/enterprise",
"common-meta/enterprise",
"frontend/enterprise",
"meta-srv/enterprise",
"operator/enterprise",
"sql/enterprise",
]
[lints]
workspace = true

View File

@@ -226,6 +226,8 @@ impl GreptimeDbStandaloneBuilder {
},
procedure_manager.clone(),
register_procedure_loaders,
#[cfg(feature = "enterprise")]
None,
)
.unwrap(),
);

View File

@@ -457,6 +457,7 @@ pub async fn setup_test_http_app_with_frontend_and_user_provider(
))
.with_log_ingest_handler(instance.fe_instance().clone(), None, None)
.with_logs_handler(instance.fe_instance().clone())
.with_influxdb_handler(instance.fe_instance().clone())
.with_otlp_handler(instance.fe_instance().clone())
.with_jaeger_handler(instance.fe_instance().clone())
.with_greptime_config_options(instance.opts.to_toml().unwrap());

View File

@@ -104,6 +104,7 @@ macro_rules! http_tests {
test_identity_pipeline_with_custom_ts,
test_pipeline_dispatcher,
test_pipeline_suffix_template,
test_pipeline_context,
test_otlp_metrics,
test_otlp_traces_v0,
@@ -116,6 +117,8 @@ macro_rules! http_tests {
test_log_query,
test_jaeger_query_api,
test_jaeger_query_api_for_trace_v1,
test_influxdb_write,
);
)*
};
@@ -1155,6 +1158,9 @@ record_type = "system_table"
threshold = "30s"
sample_ratio = 1.0
ttl = "30d"
[query]
parallelism = 0
"#,
)
.trim()
@@ -2008,6 +2014,125 @@ table_suffix: _${type}
guard.remove_all().await;
}
pub async fn test_pipeline_context(storage_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =
setup_test_http_app_with_frontend(storage_type, "test_pipeline_context").await;
// handshake
let client = TestClient::new(app).await;
let root_pipeline = r#"
processors:
- date:
field: time
formats:
- "%Y-%m-%d %H:%M:%S%.3f"
ignore_missing: true
transform:
- fields:
- id1, id1_root
- id2, id2_root
type: int32
- fields:
- type
- log
- logger
type: string
- field: time
type: time
index: timestamp
table_suffix: _${type}
"#;
// 1. create pipeline
let res = client
.post("/v1/events/pipelines/root")
.header("Content-Type", "application/x-yaml")
.body(root_pipeline)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
// 2. write data
let data_body = r#"
[
{
"id1": "2436",
"id2": "2528",
"logger": "INTERACT.MANAGER",
"type": "http",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n",
"greptime_ttl": "1d"
},
{
"id1": "2436",
"id2": "2528",
"logger": "INTERACT.MANAGER",
"type": "db",
"time": "2024-05-25 20:16:37.217",
"log": "ClusterAdapter:enter sendTextDataToCluster\\n"
}
]
"#;
let res = client
.post("/v1/events/logs?db=public&table=d_table&pipeline_name=root")
.header("Content-Type", "application/json")
.body(data_body)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);
// 3. check table list
validate_data(
"test_pipeline_context_table_list",
&client,
"show tables",
"[[\"d_table_db\"],[\"d_table_http\"],[\"demo\"],[\"numbers\"]]",
)
.await;
// 4. check each table's data
// CREATE TABLE IF NOT EXISTS "d_table_db" (
// ... ignore
// )
// ENGINE=mito
// WITH(
// append_mode = 'true'
// )
let expected = "[[\"d_table_db\",\"CREATE TABLE IF NOT EXISTS \\\"d_table_db\\\" (\\n \\\"id1_root\\\" INT NULL,\\n \\\"id2_root\\\" INT NULL,\\n \\\"type\\\" STRING NULL,\\n \\\"log\\\" STRING NULL,\\n \\\"logger\\\" STRING NULL,\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]";
validate_data(
"test_pipeline_context_db",
&client,
"show create table d_table_db",
expected,
)
.await;
// CREATE TABLE IF NOT EXISTS "d_table_http" (
// ... ignore
// )
// ENGINE=mito
// WITH(
// append_mode = 'true',
// ttl = '1day'
// )
let expected = "[[\"d_table_http\",\"CREATE TABLE IF NOT EXISTS \\\"d_table_http\\\" (\\n \\\"id1_root\\\" INT NULL,\\n \\\"id2_root\\\" INT NULL,\\n \\\"type\\\" STRING NULL,\\n \\\"log\\\" STRING NULL,\\n \\\"logger\\\" STRING NULL,\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true',\\n ttl = '1day'\\n)\"]]";
validate_data(
"test_pipeline_context_http",
&client,
"show create table d_table_http",
expected,
)
.await;
guard.remove_all().await;
}
pub async fn test_identity_pipeline_with_flatten(store_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =
@@ -4472,6 +4597,52 @@ pub async fn test_jaeger_query_api_for_trace_v1(store_type: StorageType) {
guard.remove_all().await;
}
pub async fn test_influxdb_write(store_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (app, mut guard) =
setup_test_http_app_with_frontend(store_type, "test_influxdb_write").await;
let client = TestClient::new(app).await;
// Only write field cpu.
let result = client
.post("/v1/influxdb/write?db=public&p=greptime&u=greptime")
.body("test_alter,host=host1 cpu=1.2 1664370459457010101")
.send()
.await;
assert_eq!(result.status(), 204);
assert!(result.text().await.is_empty());
// Only write field mem.
let result = client
.post("/v1/influxdb/write?db=public&p=greptime&u=greptime")
.body("test_alter,host=host1 mem=10240.0 1664370469457010101")
.send()
.await;
assert_eq!(result.status(), 204);
assert!(result.text().await.is_empty());
// Write field cpu & mem.
let result = client
.post("/v1/influxdb/write?db=public&p=greptime&u=greptime")
.body("test_alter,host=host1 cpu=3.2,mem=20480.0 1664370479457010101")
.send()
.await;
assert_eq!(result.status(), 204);
assert!(result.text().await.is_empty());
let expected = r#"[["host1",1.2,1664370459457010101,null],["host1",null,1664370469457010101,10240.0],["host1",3.2,1664370479457010101,20480.0]]"#;
validate_data(
"test_influxdb_write",
&client,
"select * from test_alter order by ts;",
expected,
)
.await;
guard.remove_all().await;
}
async fn validate_data(test_name: &str, client: &TestClient, sql: &str, expected: &str) {
let res = client
.get(format!("/v1/sql?sql={sql}").as_str())

View File

@@ -339,6 +339,8 @@ pub async fn test_mysql_timezone(store_type: StorageType) {
let timezone = conn.fetch_all("SELECT @@time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "Asia/Shanghai");
let timezone = conn.fetch_all("SELECT @@session.time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "Asia/Shanghai");
let timezone = conn.fetch_all("SELECT @@system_time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "UTC");
let _ = conn.execute("SET time_zone = 'UTC'").await.unwrap();
@@ -367,6 +369,8 @@ pub async fn test_mysql_timezone(store_type: StorageType) {
let _ = conn.execute("SET time_zone = '+08:00'").await.unwrap();
let timezone = conn.fetch_all("SELECT @@time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "+08:00");
let timezone = conn.fetch_all("SELECT @@session.time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "+08:00");
let rows2 = conn.fetch_all("select ts from demo").await.unwrap();
// we use Utc here for format only
@@ -391,6 +395,8 @@ pub async fn test_mysql_timezone(store_type: StorageType) {
);
let timezone = conn.fetch_all("SELECT @@time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "-07:00");
let timezone = conn.fetch_all("SELECT @@session.time_zone").await.unwrap();
assert_eq!(timezone[0].get::<String, usize>(0), "-07:00");
let _ = fe_mysql_server.shutdown().await;
guard.remove_all().await;

View File

@@ -0,0 +1,133 @@
CREATE TABLE base (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
Affected Rows: 0
CREATE TABLE sink (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
Affected Rows: 0
CREATE FLOW filter_out
SINK TO sink
AS
SELECT desc_str, ts FROM base
WHERE desc_str IN ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
Affected Rows: 0
SELECT options FROM INFORMATION_SCHEMA.FLOWS WHERE flow_name = 'filter_out';
+---------------------------+
| options |
+---------------------------+
| {"flow_type":"streaming"} |
+---------------------------+
INSERT INTO base VALUES
('a', '2023-01-01 00:00:00'),
('j', '2023-01-01 00:00:09'),
('l', '2023-01-01 00:00:08');
Affected Rows: 3
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('filter_out');
+--------------------------------+
| ADMIN FLUSH_FLOW('filter_out') |
+--------------------------------+
| FLOW_FLUSHED |
+--------------------------------+
SELECT * FROM sink ORDER BY ts;
+----------+---------------------+
| desc_str | ts |
+----------+---------------------+
| a | 2023-01-01T00:00:00 |
| j | 2023-01-01T00:00:09 |
+----------+---------------------+
DROP FLOW filter_out;
Affected Rows: 0
DROP TABLE base;
Affected Rows: 0
DROP TABLE sink;
Affected Rows: 0
CREATE TABLE base (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
Affected Rows: 0
CREATE TABLE sink (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
Affected Rows: 0
CREATE FLOW filter_out
SINK TO sink
AS
SELECT desc_str, ts FROM base
WHERE desc_str NOT IN ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
Affected Rows: 0
SELECT options FROM INFORMATION_SCHEMA.FLOWS WHERE flow_name = 'filter_out';
+---------------------------+
| options |
+---------------------------+
| {"flow_type":"streaming"} |
+---------------------------+
INSERT INTO base VALUES
('a', '2023-01-01 00:00:00'),
('j', '2023-01-01 00:00:09'),
('l', '2023-01-01 00:00:08');
Affected Rows: 3
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('filter_out');
+--------------------------------+
| ADMIN FLUSH_FLOW('filter_out') |
+--------------------------------+
| FLOW_FLUSHED |
+--------------------------------+
SELECT * FROM sink ORDER BY ts;
+----------+---------------------+
| desc_str | ts |
+----------+---------------------+
| l | 2023-01-01T00:00:08 |
+----------+---------------------+
DROP FLOW filter_out;
Affected Rows: 0
DROP TABLE base;
Affected Rows: 0
DROP TABLE sink;
Affected Rows: 0

View File

@@ -0,0 +1,64 @@
CREATE TABLE base (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
CREATE TABLE sink (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
CREATE FLOW filter_out
SINK TO sink
AS
SELECT desc_str, ts FROM base
WHERE desc_str IN ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
SELECT options FROM INFORMATION_SCHEMA.FLOWS WHERE flow_name = 'filter_out';
INSERT INTO base VALUES
('a', '2023-01-01 00:00:00'),
('j', '2023-01-01 00:00:09'),
('l', '2023-01-01 00:00:08');
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('filter_out');
SELECT * FROM sink ORDER BY ts;
DROP FLOW filter_out;
DROP TABLE base;
DROP TABLE sink;
CREATE TABLE base (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
CREATE TABLE sink (
desc_str STRING,
ts TIMESTAMP TIME INDEX
);
CREATE FLOW filter_out
SINK TO sink
AS
SELECT desc_str, ts FROM base
WHERE desc_str NOT IN ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
SELECT options FROM INFORMATION_SCHEMA.FLOWS WHERE flow_name = 'filter_out';
INSERT INTO base VALUES
('a', '2023-01-01 00:00:00'),
('j', '2023-01-01 00:00:09'),
('l', '2023-01-01 00:00:08');
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('filter_out');
SELECT * FROM sink ORDER BY ts;
DROP FLOW filter_out;
DROP TABLE base;
DROP TABLE sink;