mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-13 00:32:56 +00:00
Compare commits
26 Commits
fix/correc
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a56a00224f | ||
|
|
6487f14f70 | ||
|
|
45b4067721 | ||
|
|
0c373062c2 | ||
|
|
567d3e66e9 | ||
|
|
63284a5081 | ||
|
|
4891d7ceef | ||
|
|
aadfcd7821 | ||
|
|
f3e2d333e4 | ||
|
|
06f9a4c80c | ||
|
|
8e2c2e6e9a | ||
|
|
28255d8ade | ||
|
|
90deaae844 | ||
|
|
a32326c887 | ||
|
|
fce1687fa7 | ||
|
|
ef6dd5b99f | ||
|
|
ac6d68aa2d | ||
|
|
d39895a970 | ||
|
|
59867cd5b6 | ||
|
|
9a4b7cbb32 | ||
|
|
2f242927a8 | ||
|
|
77310ec5bd | ||
|
|
ada4666e10 | ||
|
|
898e84898c | ||
|
|
6f86a22e6f | ||
|
|
5162c1de4d |
@@ -15,8 +15,11 @@ repos:
|
||||
rev: v1.0
|
||||
hooks:
|
||||
- id: fmt
|
||||
args: ["--", "--check"]
|
||||
stages: [commit-msg]
|
||||
- id: clippy
|
||||
args: ["--workspace", "--all-targets", "--all-features", "--", "-D", "warnings"]
|
||||
stages: [pre-push]
|
||||
stages: [commit-msg]
|
||||
- id: cargo-check
|
||||
args: ["--workspace", "--all-targets", "--all-features"]
|
||||
stages: [commit-msg]
|
||||
|
||||
18
Cargo.lock
generated
18
Cargo.lock
generated
@@ -5467,7 +5467,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a2e5099d72a1cfa8ba41fa4296101eb5f874074a#a2e5099d72a1cfa8ba41fa4296101eb5f874074a"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=58aeee49267fb1eafa6f9123f9d0c47dd0f62722#58aeee49267fb1eafa6f9123f9d0c47dd0f62722"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"prost-types 0.13.5",
|
||||
@@ -9953,9 +9953,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql-parser"
|
||||
version = "0.6.0"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "328fe69c2443ec4f8e6c33ea925dde04a1026e6c95928e89ed02343944cac9bf"
|
||||
checksum = "6c3c2199b84e1253aade469e92ae16cd8dbe1de031c66a00f4f5cdd650290a86"
|
||||
dependencies = [
|
||||
"cfgrammar",
|
||||
"chrono",
|
||||
@@ -9965,7 +9965,6 @@ dependencies = [
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"unescaper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -10326,7 +10325,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
"unescaper",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -12514,6 +12512,7 @@ dependencies = [
|
||||
"servers",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"table",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -14169,15 +14168,6 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unescaper"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c01d12e3a56a4432a8b436f293c25f4808bdf9e9f9f98f9260bba1f1bc5a1f26"
|
||||
dependencies = [
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.8.1"
|
||||
|
||||
@@ -151,7 +151,7 @@ etcd-client = { version = "0.16.1", features = [
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a2e5099d72a1cfa8ba41fa4296101eb5f874074a" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "58aeee49267fb1eafa6f9123f9d0c47dd0f62722" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -189,7 +189,7 @@ paste = "1.0"
|
||||
pin-project = "1.0"
|
||||
pretty_assertions = "1.4.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
promql-parser = { version = "0.6", features = ["ser"] }
|
||||
promql-parser = { version = "0.7.1", features = ["ser"] }
|
||||
prost = { version = "0.13", features = ["no-recursion-limit"] }
|
||||
prost-types = "0.13"
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
|
||||
@@ -8863,7 +8863,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed of Reconciliation steps ",
|
||||
"description": "Elapsed of Reconciliation steps",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -9366,7 +9366,7 @@
|
||||
"editorMode": "code",
|
||||
"expr": "greptime_flow_input_buf_size",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -9472,6 +9472,755 @@
|
||||
],
|
||||
"title": "Flownode",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 187
|
||||
},
|
||||
"id": 357,
|
||||
"panels": [],
|
||||
"title": "Trigger",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "bf9fzta69bhtsa"
|
||||
},
|
||||
"description": "Total number of triggers currently defined.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 188
|
||||
},
|
||||
"id": 358,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "greptime_trigger_count{}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Trigger Count",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed time for trigger evaluation, including query execution and condition evaluation.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 196
|
||||
},
|
||||
"id": 359,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-p99",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-p75",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Trigger Eval Elapsed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Rate of failed trigger evaluations.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 196
|
||||
},
|
||||
"id": 360,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(greptime_trigger_evaluate_failure_count[$__rate_interval])",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Trigger Eval Failure Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed time to send trigger alerts to notification channels.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 204
|
||||
},
|
||||
"id": 361,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Send Alert Elapsed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Rate of failures when sending trigger alerts.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 204
|
||||
},
|
||||
"id": 364,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(greptime_trigger_send_alert_failure_count[$__rate_interval])",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Send Alert Failure Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed time to persist trigger alert records.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 212
|
||||
},
|
||||
"id": 363,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Save Alert Elapsed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Rate of failures when persisting trigger alert records.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 212
|
||||
},
|
||||
"id": 362,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Save Alert Failure Rate",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
@@ -9613,4 +10362,4 @@
|
||||
"title": "GreptimeDB",
|
||||
"uid": "dejf3k5e7g2kgb",
|
||||
"version": 15
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,12 +111,34 @@
|
||||
| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
|
||||
| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
|
||||
| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
|
||||
| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
|
||||
| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
|
||||
# Flownode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
|
||||
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
|
||||
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
|
||||
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
|
||||
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
|
||||
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
|
||||
# Trigger
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Trigger Count | `greptime_trigger_count{}` | `timeseries` | Total number of triggers currently defined. | `prometheus` | -- | `__auto` |
|
||||
| Trigger Eval Elapsed | `histogram_quantile(0.99,
|
||||
rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
|
||||
)`<br/>`histogram_quantile(0.75,
|
||||
rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
|
||||
)` | `timeseries` | Elapsed time for trigger evaluation, including query execution and condition evaluation. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
|
||||
| Trigger Eval Failure Rate | `rate(greptime_trigger_evaluate_failure_count[$__rate_interval])` | `timeseries` | Rate of failed trigger evaluations. | `prometheus` | `none` | `__auto` |
|
||||
| Send Alert Elapsed | `histogram_quantile(0.99,
|
||||
rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
|
||||
)`<br/>`histogram_quantile(0.75,
|
||||
rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
|
||||
)` | `timeseries` | Elapsed time to send trigger alerts to notification channels. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99` |
|
||||
| Send Alert Failure Rate | `rate(greptime_trigger_send_alert_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when sending trigger alerts. | `prometheus` | `none` | `__auto` |
|
||||
| Save Alert Elapsed | `histogram_quantile(0.99,
|
||||
rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
|
||||
)`<br/>`histogram_quantile(0.75,
|
||||
rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
|
||||
)` | `timeseries` | Elapsed time to persist trigger alert records. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99` |
|
||||
| Save Alert Failure Rate | `rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when persisting trigger alert records. | `prometheus` | `none` | `__auto` |
|
||||
|
||||
@@ -1002,7 +1002,7 @@ groups:
|
||||
legendFormat: '{{pod}}-{{table_type}}-{{type}}'
|
||||
- title: Reconciliation steps
|
||||
type: timeseries
|
||||
description: 'Elapsed of Reconciliation steps '
|
||||
description: Elapsed of Reconciliation steps
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
|
||||
@@ -1057,7 +1057,7 @@ groups:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Flow Processing Error per Instance
|
||||
type: timeseries
|
||||
description: Flow Processing Error per Instance.
|
||||
@@ -1067,3 +1067,89 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
|
||||
- title: Trigger
|
||||
panels:
|
||||
- title: Trigger Count
|
||||
type: timeseries
|
||||
description: Total number of triggers currently defined.
|
||||
queries:
|
||||
- expr: greptime_trigger_count{}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Trigger Eval Elapsed
|
||||
type: timeseries
|
||||
description: Elapsed time for trigger evaluation, including query execution and condition evaluation.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- expr: "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p75'
|
||||
- title: Trigger Eval Failure Rate
|
||||
type: timeseries
|
||||
description: Rate of failed trigger evaluations.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: rate(greptime_trigger_evaluate_failure_count[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Send Alert Elapsed
|
||||
type: timeseries
|
||||
description: Elapsed time to send trigger alerts to notification channels.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99'
|
||||
- expr: "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75'
|
||||
- title: Send Alert Failure Rate
|
||||
type: timeseries
|
||||
description: Rate of failures when sending trigger alerts.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: rate(greptime_trigger_send_alert_failure_count[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Save Alert Elapsed
|
||||
type: timeseries
|
||||
description: Elapsed time to persist trigger alert records.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99'
|
||||
- expr: "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75'
|
||||
- title: Save Alert Failure Rate
|
||||
type: timeseries
|
||||
description: Rate of failures when persisting trigger alert records.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
|
||||
@@ -8863,7 +8863,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed of Reconciliation steps ",
|
||||
"description": "Elapsed of Reconciliation steps",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -9366,7 +9366,7 @@
|
||||
"editorMode": "code",
|
||||
"expr": "greptime_flow_input_buf_size",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -9472,6 +9472,755 @@
|
||||
],
|
||||
"title": "Flownode",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 187
|
||||
},
|
||||
"id": 357,
|
||||
"panels": [],
|
||||
"title": "Trigger",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "bf9fzta69bhtsa"
|
||||
},
|
||||
"description": "Total number of triggers currently defined.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 188
|
||||
},
|
||||
"id": 358,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "greptime_trigger_count{}",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Trigger Count",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed time for trigger evaluation, including query execution and condition evaluation.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 196
|
||||
},
|
||||
"id": 359,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-p99",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-p75",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Trigger Eval Elapsed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Rate of failed trigger evaluations.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 196
|
||||
},
|
||||
"id": 360,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(greptime_trigger_evaluate_failure_count[$__rate_interval])",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Trigger Eval Failure Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed time to send trigger alerts to notification channels.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 204
|
||||
},
|
||||
"id": 361,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Send Alert Elapsed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Rate of failures when sending trigger alerts.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 204
|
||||
},
|
||||
"id": 364,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(greptime_trigger_send_alert_failure_count[$__rate_interval])",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Send Alert Failure Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Elapsed time to persist trigger alert records.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 212
|
||||
},
|
||||
"id": 363,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Save Alert Elapsed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"description": "Rate of failures when persisting trigger alert records.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 212
|
||||
},
|
||||
"id": 362,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.6.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Save Alert Failure Rate",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"preload": false,
|
||||
@@ -9613,4 +10362,4 @@
|
||||
"title": "GreptimeDB",
|
||||
"uid": "dejf3k5e7g2kgb",
|
||||
"version": 15
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,12 +111,34 @@
|
||||
| Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
|
||||
| DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
|
||||
| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
|
||||
| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
|
||||
| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
|
||||
# Flownode
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Flow Ingest / Output Rate | `sum by(instance, pod, direction) (rate(greptime_flow_processed_rows[$__rate_interval]))` | `timeseries` | Flow Ingest / Output Rate. | `prometheus` | -- | `[{{pod}}]-[{{instance}}]-[{{direction}}]` |
|
||||
| Flow Ingest Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_insert_elapsed_bucket[$__rate_interval])) by (le, instance, pod))` | `timeseries` | Flow Ingest Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-p95` |
|
||||
| Flow Operation Latency | `histogram_quantile(0.95, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))`<br/>`histogram_quantile(0.99, sum(rate(greptime_flow_processing_time_bucket[$__rate_interval])) by (le,instance,pod,type))` | `timeseries` | Flow Operation Latency. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]-p95` |
|
||||
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}]` |
|
||||
| Flow Buffer Size per Instance | `greptime_flow_input_buf_size` | `timeseries` | Flow Buffer Size per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]` |
|
||||
| Flow Processing Error per Instance | `sum by(instance,pod,code) (rate(greptime_flow_errors[$__rate_interval]))` | `timeseries` | Flow Processing Error per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{code}}]` |
|
||||
# Trigger
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Trigger Count | `greptime_trigger_count{}` | `timeseries` | Total number of triggers currently defined. | `prometheus` | -- | `__auto` |
|
||||
| Trigger Eval Elapsed | `histogram_quantile(0.99,
|
||||
rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
|
||||
)`<br/>`histogram_quantile(0.75,
|
||||
rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])
|
||||
)` | `timeseries` | Elapsed time for trigger evaluation, including query execution and condition evaluation. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-p99` |
|
||||
| Trigger Eval Failure Rate | `rate(greptime_trigger_evaluate_failure_count[$__rate_interval])` | `timeseries` | Rate of failed trigger evaluations. | `prometheus` | `none` | `__auto` |
|
||||
| Send Alert Elapsed | `histogram_quantile(0.99,
|
||||
rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
|
||||
)`<br/>`histogram_quantile(0.75,
|
||||
rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])
|
||||
)` | `timeseries` | Elapsed time to send trigger alerts to notification channels. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99` |
|
||||
| Send Alert Failure Rate | `rate(greptime_trigger_send_alert_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when sending trigger alerts. | `prometheus` | `none` | `__auto` |
|
||||
| Save Alert Elapsed | `histogram_quantile(0.99,
|
||||
rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
|
||||
)`<br/>`histogram_quantile(0.75,
|
||||
rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])
|
||||
)` | `timeseries` | Elapsed time to persist trigger alert records. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99` |
|
||||
| Save Alert Failure Rate | `rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])` | `timeseries` | Rate of failures when persisting trigger alert records. | `prometheus` | `none` | `__auto` |
|
||||
|
||||
@@ -1002,7 +1002,7 @@ groups:
|
||||
legendFormat: '{{pod}}-{{table_type}}-{{type}}'
|
||||
- title: Reconciliation steps
|
||||
type: timeseries
|
||||
description: 'Elapsed of Reconciliation steps '
|
||||
description: Elapsed of Reconciliation steps
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
|
||||
@@ -1057,7 +1057,7 @@ groups:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]'
|
||||
- title: Flow Processing Error per Instance
|
||||
type: timeseries
|
||||
description: Flow Processing Error per Instance.
|
||||
@@ -1067,3 +1067,89 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{code}}]'
|
||||
- title: Trigger
|
||||
panels:
|
||||
- title: Trigger Count
|
||||
type: timeseries
|
||||
description: Total number of triggers currently defined.
|
||||
queries:
|
||||
- expr: greptime_trigger_count{}
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Trigger Eval Elapsed
|
||||
type: timeseries
|
||||
description: Elapsed time for trigger evaluation, including query execution and condition evaluation.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: "histogram_quantile(0.99, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p99'
|
||||
- expr: "histogram_quantile(0.75, \n rate(greptime_trigger_evaluate_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-p75'
|
||||
- title: Trigger Eval Failure Rate
|
||||
type: timeseries
|
||||
description: Rate of failed trigger evaluations.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: rate(greptime_trigger_evaluate_failure_count[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Send Alert Elapsed
|
||||
type: timeseries
|
||||
description: Elapsed time to send trigger alerts to notification channels.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: "histogram_quantile(0.99, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p99'
|
||||
- expr: "histogram_quantile(0.75, \n rate(greptime_trigger_send_alert_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{channel_type}}]-p75'
|
||||
- title: Send Alert Failure Rate
|
||||
type: timeseries
|
||||
description: Rate of failures when sending trigger alerts.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: rate(greptime_trigger_send_alert_failure_count[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
- title: Save Alert Elapsed
|
||||
type: timeseries
|
||||
description: Elapsed time to persist trigger alert records.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: "histogram_quantile(0.99, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p99'
|
||||
- expr: "histogram_quantile(0.75, \n rate(greptime_trigger_save_alert_record_elapsed_bucket[$__rate_interval])\n)"
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{storage_type}}]-p75'
|
||||
- title: Save Alert Failure Rate
|
||||
type: timeseries
|
||||
description: Rate of failures when persisting trigger alert records.
|
||||
unit: none
|
||||
queries:
|
||||
- expr: rate(greptime_trigger_save_alert_record_failure_count[$__rate_interval])
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: __auto
|
||||
|
||||
@@ -32,6 +32,7 @@ use crate::error::Result;
|
||||
pub mod error;
|
||||
pub mod information_extension;
|
||||
pub mod kvbackend;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod memory;
|
||||
mod metrics;
|
||||
pub mod system_schema;
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) const METRIC_DB_LABEL: &str = "db";
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use prometheus::*;
|
||||
|
||||
@@ -25,7 +23,7 @@ lazy_static! {
|
||||
pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!(
|
||||
"greptime_catalog_table_count",
|
||||
"catalog table count",
|
||||
&[METRIC_DB_LABEL]
|
||||
&["db"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
|
||||
|
||||
@@ -24,6 +24,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use common_telemetry::tracing::Span;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures_util::StreamExt;
|
||||
use snafu::ResultExt;
|
||||
@@ -163,6 +164,7 @@ impl DataSource for SystemTableDataSource {
|
||||
stream: Box::pin(stream),
|
||||
output_ordering: None,
|
||||
metrics: Default::default(),
|
||||
span: Span::current(),
|
||||
};
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use core::pin::pin;
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
@@ -31,15 +32,17 @@ use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
StringVectorBuilder, TimestampSecondVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use futures::StreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use crate::CatalogManager;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
|
||||
UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates, TABLES};
|
||||
use crate::system_schema::utils;
|
||||
|
||||
@@ -247,6 +250,10 @@ impl InformationSchemaTablesBuilder {
|
||||
.catalog_manager
|
||||
.upgrade()
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let partition_manager = catalog_manager
|
||||
.as_any()
|
||||
.downcast_ref::<KvBackendCatalogManager>()
|
||||
.map(|catalog_manager| catalog_manager.partition_manager());
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
@@ -267,37 +274,59 @@ impl InformationSchemaTablesBuilder {
|
||||
};
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
let table_stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
const BATCH_SIZE: usize = 128;
|
||||
// Split tables into chunks
|
||||
let mut table_chunks = pin!(table_stream.ready_chunks(BATCH_SIZE));
|
||||
|
||||
// TODO(dennis): make it working for metric engine
|
||||
let table_region_stats =
|
||||
if table_info.meta.engine == MITO_ENGINE || table_info.is_physical_table() {
|
||||
table_info
|
||||
.meta
|
||||
.region_numbers
|
||||
.iter()
|
||||
.map(|n| RegionId::new(table_info.ident.table_id, *n))
|
||||
.flat_map(|region_id| {
|
||||
region_stats
|
||||
.binary_search_by_key(®ion_id, |x| x.id)
|
||||
.map(|i| ®ion_stats[i])
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
while let Some(tables) = table_chunks.next().await {
|
||||
let tables = tables.into_iter().collect::<Result<Vec<_>>>()?;
|
||||
let mito_or_physical_table_ids = tables
|
||||
.iter()
|
||||
.filter(|table| {
|
||||
table.table_info().meta.engine == MITO_ENGINE
|
||||
|| table.table_info().is_physical_table()
|
||||
})
|
||||
.map(|table| table.table_info().ident.table_id)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.add_table(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_info,
|
||||
table.table_type(),
|
||||
&table_region_stats,
|
||||
);
|
||||
let table_routes = if let Some(partition_manager) = &partition_manager {
|
||||
partition_manager
|
||||
.batch_find_region_routes(&mito_or_physical_table_ids)
|
||||
.await
|
||||
.context(FindRegionRoutesSnafu)?
|
||||
} else {
|
||||
mito_or_physical_table_ids
|
||||
.into_iter()
|
||||
.map(|id| (id, vec![]))
|
||||
.collect()
|
||||
};
|
||||
|
||||
for table in tables {
|
||||
let table_region_stats =
|
||||
match table_routes.get(&table.table_info().ident.table_id) {
|
||||
Some(routes) => routes
|
||||
.iter()
|
||||
.flat_map(|route| {
|
||||
let region_id = route.region.id;
|
||||
region_stats
|
||||
.binary_search_by_key(®ion_id, |x| x.id)
|
||||
.map(|i| ®ion_stats[i])
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
None => vec![],
|
||||
};
|
||||
|
||||
self.add_table(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table.table_info(),
|
||||
table.table_type(),
|
||||
&table_region_stats,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -337,7 +337,7 @@ mod tests {
|
||||
.build();
|
||||
|
||||
let table_metadata_manager = TableMetadataManager::new(backend);
|
||||
let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);
|
||||
let mut view_info = common_meta::key::test_utils::new_test_table_info(1024);
|
||||
view_info.table_type = TableType::View;
|
||||
let logical_plan = vec![1, 2, 3];
|
||||
// Create view metadata
|
||||
|
||||
@@ -162,7 +162,6 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
|
||||
next_column_id: columns as u32 + 1,
|
||||
value_indices: vec![],
|
||||
options: Default::default(),
|
||||
region_numbers: (1..=100).collect(),
|
||||
partition_key_indices: vec![],
|
||||
column_ids: vec![],
|
||||
};
|
||||
|
||||
@@ -68,8 +68,8 @@ pub enum Error {
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start wal options allocator"))]
|
||||
StartWalOptionsAllocator {
|
||||
#[snafu(display("Failed to start wal provider"))]
|
||||
StartWalProvider {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
@@ -343,7 +343,7 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::StartProcedureManager { source, .. }
|
||||
| Error::StopProcedureManager { source, .. } => source.status_code(),
|
||||
Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
|
||||
Error::StartWalProvider { source, .. } => source.status_code(),
|
||||
Error::HttpQuerySql { .. } => StatusCode::Internal,
|
||||
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
|
||||
source.status_code()
|
||||
|
||||
@@ -37,6 +37,7 @@ use common_grpc::flight::{FlightDecoder, FlightMessage};
|
||||
use common_query::Output;
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStreamWrapper};
|
||||
use common_telemetry::tracing::Span;
|
||||
use common_telemetry::tracing_context::W3cTrace;
|
||||
use common_telemetry::{error, warn};
|
||||
use futures::future;
|
||||
@@ -456,6 +457,7 @@ impl Database {
|
||||
stream,
|
||||
output_ordering: None,
|
||||
metrics: Default::default(),
|
||||
span: Span::current(),
|
||||
};
|
||||
Ok(Output::new_with_stream(Box::pin(record_batch_stream)))
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ use common_query::request::QueryRequest;
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use common_telemetry::error;
|
||||
use common_telemetry::tracing::Span;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use prost::Message;
|
||||
use query::query_engine::DefaultSerializer;
|
||||
@@ -242,6 +243,7 @@ impl RegionRequester {
|
||||
stream,
|
||||
output_ordering: None,
|
||||
metrics,
|
||||
span: Span::current(),
|
||||
};
|
||||
Ok(Box::pin(record_batch_stream))
|
||||
}
|
||||
|
||||
@@ -330,7 +330,6 @@ mod tests {
|
||||
use common_config::ENV_VAR_SEP;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use object_store::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
|
||||
use super::*;
|
||||
use crate::options::GlobalOptions;
|
||||
@@ -374,9 +373,6 @@ mod tests {
|
||||
hostname = "127.0.0.1"
|
||||
runtime_size = 8
|
||||
|
||||
[heartbeat]
|
||||
interval = "300ms"
|
||||
|
||||
[meta_client]
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
timeout = "3s"
|
||||
@@ -434,13 +430,6 @@ mod tests {
|
||||
);
|
||||
assert!(!raft_engine_config.sync_write);
|
||||
|
||||
let HeartbeatOptions {
|
||||
interval: heart_beat_interval,
|
||||
..
|
||||
} = options.heartbeat;
|
||||
|
||||
assert_eq!(300, heart_beat_interval.as_millis());
|
||||
|
||||
let MetaClientOptions {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout,
|
||||
|
||||
@@ -64,8 +64,8 @@ pub enum Error {
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start wal options allocator"))]
|
||||
StartWalOptionsAllocator {
|
||||
#[snafu(display("Failed to start wal provider"))]
|
||||
StartWalProvider {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
@@ -289,8 +289,8 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build wal options allocator"))]
|
||||
BuildWalOptionsAllocator {
|
||||
#[snafu(display("Failed to build wal provider"))]
|
||||
BuildWalProvider {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
@@ -350,8 +350,9 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::StartProcedureManager { source, .. }
|
||||
| Error::StopProcedureManager { source, .. } => source.status_code(),
|
||||
Error::BuildWalOptionsAllocator { source, .. }
|
||||
| Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
|
||||
Error::BuildWalProvider { source, .. } | Error::StartWalProvider { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::HttpQuerySql { .. } => StatusCode::Internal,
|
||||
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
|
||||
source.status_code()
|
||||
|
||||
@@ -358,7 +358,6 @@ impl StartCommand {
|
||||
let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
|
||||
&opts,
|
||||
meta_client.clone(),
|
||||
opts.heartbeat.clone(),
|
||||
Arc::new(executor),
|
||||
Arc::new(resource_stat),
|
||||
);
|
||||
|
||||
@@ -20,6 +20,7 @@ use std::time::Duration;
|
||||
use async_trait::async_trait;
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use catalog::information_extension::DistributedInformationExtension;
|
||||
use catalog::information_schema::InformationExtensionRef;
|
||||
use catalog::kvbackend::{
|
||||
CachedKvBackendBuilder, CatalogManagerConfiguratorRef, KvBackendCatalogManagerBuilder,
|
||||
MetaKvBackend,
|
||||
@@ -412,6 +413,7 @@ impl StartCommand {
|
||||
meta_client.clone(),
|
||||
client.clone(),
|
||||
));
|
||||
plugins.insert::<InformationExtensionRef>(information_extension.clone());
|
||||
|
||||
let process_manager = Arc::new(ProcessManager::new(
|
||||
addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
|
||||
@@ -108,7 +108,7 @@ pub trait App: Send {
|
||||
}
|
||||
}
|
||||
|
||||
/// Log the versions of the application, and the arguments passed to the cli.
|
||||
/// Log the versions of the application.
|
||||
///
|
||||
/// `version` should be the same as the output of cli "--version";
|
||||
/// and the `short_version` is the short version of the codes, often consist of git branch and commit.
|
||||
@@ -118,10 +118,7 @@ pub fn log_versions(version: &str, short_version: &str, app: &str) {
|
||||
.with_label_values(&[common_version::version(), short_version, app])
|
||||
.inc();
|
||||
|
||||
// Log version and argument flags.
|
||||
info!("GreptimeDB version: {}", version);
|
||||
|
||||
log_env_flags();
|
||||
}
|
||||
|
||||
pub fn create_resource_limit_metrics(app: &str) {
|
||||
@@ -144,13 +141,6 @@ pub fn create_resource_limit_metrics(app: &str) {
|
||||
}
|
||||
}
|
||||
|
||||
fn log_env_flags() {
|
||||
info!("command line arguments");
|
||||
for argument in std::env::args() {
|
||||
info!("argument: {}", argument);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn maybe_activate_heap_profile(memory_options: &common_options::memory::MemoryOptions) {
|
||||
if memory_options.enable_heap_profiling {
|
||||
match activate_heap_profile() {
|
||||
|
||||
@@ -40,7 +40,7 @@ use common_meta::procedure_executor::LocalProcedureExecutor;
|
||||
use common_meta::region_keeper::MemoryRegionKeeper;
|
||||
use common_meta::region_registry::LeaderRegionRegistry;
|
||||
use common_meta::sequence::SequenceBuilder;
|
||||
use common_meta::wal_options_allocator::{WalOptionsAllocatorRef, build_wal_options_allocator};
|
||||
use common_meta::wal_provider::{WalProviderRef, build_wal_provider};
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_query::prelude::set_default_prefix;
|
||||
use common_telemetry::info;
|
||||
@@ -64,8 +64,8 @@ use plugins::frontend::context::{
|
||||
use plugins::standalone::context::DdlManagerConfigureContext;
|
||||
use servers::tls::{TlsMode, TlsOption, merge_tls_option};
|
||||
use snafu::ResultExt;
|
||||
use standalone::StandaloneInformationExtension;
|
||||
use standalone::options::StandaloneOptions;
|
||||
use standalone::{StandaloneInformationExtension, StandaloneRepartitionProcedureFactory};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{OtherSnafu, Result, StartFlownodeSnafu};
|
||||
@@ -120,7 +120,7 @@ pub struct Instance {
|
||||
frontend: Frontend,
|
||||
flownode: FlownodeInstance,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
wal_provider: WalProviderRef,
|
||||
// Keep the logging guard to prevent the worker from being dropped.
|
||||
_guard: Vec<WorkerGuard>,
|
||||
}
|
||||
@@ -146,10 +146,10 @@ impl App for Instance {
|
||||
.await
|
||||
.context(error::StartProcedureManagerSnafu)?;
|
||||
|
||||
self.wal_options_allocator
|
||||
self.wal_provider
|
||||
.start()
|
||||
.await
|
||||
.context(error::StartWalOptionsAllocatorSnafu)?;
|
||||
.context(error::StartWalProviderSnafu)?;
|
||||
|
||||
plugins::start_frontend_plugins(self.frontend.instance.plugins().clone())
|
||||
.await
|
||||
@@ -468,7 +468,7 @@ impl StartCommand {
|
||||
flow_server: flownode.flow_engine(),
|
||||
});
|
||||
|
||||
let table_id_sequence = Arc::new(
|
||||
let table_id_allocator = Arc::new(
|
||||
SequenceBuilder::new(TABLE_ID_SEQ, kv_backend.clone())
|
||||
.initial(MIN_USER_TABLE_ID as u64)
|
||||
.step(10)
|
||||
@@ -485,13 +485,13 @@ impl StartCommand {
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::InvalidWalProviderSnafu)?;
|
||||
let wal_options_allocator = build_wal_options_allocator(&kafka_options, kv_backend.clone())
|
||||
let wal_provider = build_wal_provider(&kafka_options, kv_backend.clone())
|
||||
.await
|
||||
.context(error::BuildWalOptionsAllocatorSnafu)?;
|
||||
let wal_options_allocator = Arc::new(wal_options_allocator);
|
||||
.context(error::BuildWalProviderSnafu)?;
|
||||
let wal_provider = Arc::new(wal_provider);
|
||||
let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
table_id_sequence,
|
||||
wal_options_allocator.clone(),
|
||||
table_id_allocator,
|
||||
wal_provider.clone(),
|
||||
));
|
||||
let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
|
||||
flow_id_sequence,
|
||||
@@ -509,8 +509,13 @@ impl StartCommand {
|
||||
region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
|
||||
};
|
||||
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager.clone(), true)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
let ddl_manager = DdlManager::try_new(
|
||||
ddl_context,
|
||||
procedure_manager.clone(),
|
||||
Arc::new(StandaloneRepartitionProcedureFactory),
|
||||
true,
|
||||
)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
|
||||
let ddl_manager = if let Some(configurator) =
|
||||
plugins.get::<DdlManagerConfiguratorRef<DdlManagerConfigureContext>>()
|
||||
@@ -585,7 +590,7 @@ impl StartCommand {
|
||||
frontend,
|
||||
flownode,
|
||||
procedure_manager,
|
||||
wal_options_allocator,
|
||||
wal_provider,
|
||||
_guard: guard,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -228,7 +228,6 @@ fn test_load_flownode_example_config() {
|
||||
..Default::default()
|
||||
},
|
||||
tracing: Default::default(),
|
||||
heartbeat: Default::default(),
|
||||
// flownode deliberately use a slower query parallelism
|
||||
// to avoid overwhelming the frontend with too many queries
|
||||
query: QueryOptions {
|
||||
|
||||
@@ -34,7 +34,7 @@ use table::requests::{
|
||||
};
|
||||
|
||||
use crate::error::{
|
||||
ColumnNotFoundSnafu, InvalidColumnDefSnafu, InvalidIndexOptionSnafu,
|
||||
self, ColumnNotFoundSnafu, InvalidColumnDefSnafu, InvalidIndexOptionSnafu,
|
||||
InvalidSetFulltextOptionRequestSnafu, InvalidSetSkippingIndexOptionRequestSnafu,
|
||||
InvalidSetTableOptionRequestSnafu, InvalidUnsetTableOptionRequestSnafu,
|
||||
MissingAlterIndexOptionSnafu, MissingFieldSnafu, MissingTableMetaSnafu,
|
||||
@@ -251,6 +251,10 @@ pub fn alter_expr_to_request(
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
AlterKind::SetDefaults { defaults }
|
||||
}
|
||||
Kind::Repartition(_) => error::UnexpectedSnafu {
|
||||
err_msg: "Repartition operation should be handled through DdlManager and not converted to AlterTableRequest",
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
|
||||
@@ -161,6 +161,13 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unexpected: {err_msg}"))]
|
||||
Unexpected {
|
||||
err_msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -188,6 +195,7 @@ impl ErrorExt for Error {
|
||||
Error::ColumnNotFound { .. } => StatusCode::TableColumnNotFound,
|
||||
Error::SqlCommon { source, .. } => source.status_code(),
|
||||
Error::MissingTableMeta { .. } => StatusCode::Unexpected,
|
||||
Error::Unexpected { .. } => StatusCode::Unexpected,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ use crate::node_manager::NodeManagerRef;
|
||||
use crate::region_keeper::MemoryRegionKeeperRef;
|
||||
use crate::region_registry::LeaderRegionRegistryRef;
|
||||
|
||||
pub mod allocator;
|
||||
pub mod alter_database;
|
||||
pub mod alter_logical_tables;
|
||||
pub mod alter_table;
|
||||
@@ -36,8 +37,7 @@ pub mod create_database;
|
||||
pub mod create_flow;
|
||||
pub mod create_logical_tables;
|
||||
pub mod create_table;
|
||||
mod create_table_template;
|
||||
pub(crate) use create_table_template::{CreateRequestBuilder, build_template_from_raw_table_info};
|
||||
pub(crate) use create_table::{CreateRequestBuilder, build_template_from_raw_table_info};
|
||||
pub mod create_view;
|
||||
pub mod drop_database;
|
||||
pub mod drop_flow;
|
||||
|
||||
17
src/common/meta/src/ddl/allocator.rs
Normal file
17
src/common/meta/src/ddl/allocator.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod region_routes;
|
||||
pub mod resource_id;
|
||||
pub mod wal_options;
|
||||
80
src/common/meta/src/ddl/allocator/region_routes.rs
Normal file
80
src/common/meta/src/ddl/allocator/region_routes.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::peer::PeerAllocator;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
|
||||
pub type RegionRoutesAllocatorRef = Arc<dyn RegionRoutesAllocator>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait RegionRoutesAllocator: Send + Sync {
|
||||
async fn allocate(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
regions_and_partitions: &[(RegionNumber, &str)],
|
||||
) -> Result<Vec<RegionRoute>>;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<T: PeerAllocator> RegionRoutesAllocator for T {
|
||||
async fn allocate(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
regions_and_partitions: &[(RegionNumber, &str)],
|
||||
) -> Result<Vec<RegionRoute>> {
|
||||
let regions = regions_and_partitions.len().max(1);
|
||||
let peers = self.alloc(regions).await?;
|
||||
debug!("Allocated peers {:?} for table {}", peers, table_id,);
|
||||
|
||||
let mut region_routes = regions_and_partitions
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, (region_number, partition))| {
|
||||
let region = Region {
|
||||
id: RegionId::new(table_id, *region_number),
|
||||
partition_expr: partition.to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let peer = peers[i % peers.len()].clone();
|
||||
|
||||
RegionRoute {
|
||||
region,
|
||||
leader_peer: Some(peer),
|
||||
..Default::default()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// If the table has no partitions, we need to create a default region.
|
||||
if region_routes.is_empty() {
|
||||
region_routes.push(RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 0),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(peers[0].clone()),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
Ok(region_routes)
|
||||
}
|
||||
}
|
||||
35
src/common/meta/src/ddl/allocator/resource_id.rs
Normal file
35
src/common/meta/src/ddl/allocator/resource_id.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
pub type ResourceIdAllocatorRef = Arc<dyn ResourceIdAllocator>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait ResourceIdAllocator: Send + Sync {
|
||||
/// Returns the next value and increments the sequence.
|
||||
async fn next(&self) -> Result<u64>;
|
||||
|
||||
/// Returns the current value stored in the remote storage without incrementing the sequence.
|
||||
async fn peek(&self) -> Result<u64>;
|
||||
|
||||
/// Jumps to the given value.
|
||||
async fn jump_to(&self, next: u64) -> Result<()>;
|
||||
|
||||
/// Returns the range of available sequences.
|
||||
async fn min_max(&self) -> Range<u64>;
|
||||
}
|
||||
31
src/common/meta/src/ddl/allocator/wal_options.rs
Normal file
31
src/common/meta/src/ddl/allocator/wal_options.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use store_api::storage::RegionNumber;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
pub type WalOptionsAllocatorRef = Arc<dyn WalOptionsAllocator>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait WalOptionsAllocator: Send + Sync {
|
||||
async fn allocate(
|
||||
&self,
|
||||
region_numbers: &[RegionNumber],
|
||||
skip_wal: bool,
|
||||
) -> Result<HashMap<RegionNumber, String>>;
|
||||
}
|
||||
@@ -22,7 +22,7 @@ use snafu::OptionExt;
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use crate::ddl::alter_table::AlterTableProcedure;
|
||||
use crate::error::{InvalidProtoMsgSnafu, Result};
|
||||
use crate::error::{self, InvalidProtoMsgSnafu, Result};
|
||||
|
||||
impl AlterTableProcedure {
|
||||
/// Makes alter kind proto that all regions can reuse.
|
||||
@@ -112,6 +112,10 @@ fn create_proto_alter_kind(
|
||||
Kind::UnsetIndexes(v) => Ok(Some(alter_request::Kind::UnsetIndexes(v.clone()))),
|
||||
Kind::DropDefaults(v) => Ok(Some(alter_request::Kind::DropDefaults(v.clone()))),
|
||||
Kind::SetDefaults(v) => Ok(Some(alter_request::Kind::SetDefaults(v.clone()))),
|
||||
Kind::Repartition(_) => error::UnexpectedSnafu {
|
||||
err_msg: "Repartition operation should be handled through DdlManager and not converted to AlterTableRequest",
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::ALTER_PHYSICAL_EXTENSION_KEY;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use store_api::storage::RegionNumber;
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
|
||||
@@ -286,14 +286,7 @@ impl CreateTablesData {
|
||||
.flat_map(|(task, table_id)| {
|
||||
if table_id.is_none() {
|
||||
let table_info = task.table_info.clone();
|
||||
let region_ids = self
|
||||
.physical_region_numbers
|
||||
.iter()
|
||||
.map(|region_number| {
|
||||
RegionId::new(table_info.ident.table_id, *region_number)
|
||||
})
|
||||
.collect();
|
||||
let table_route = TableRouteValue::logical(self.physical_table_id, region_ids);
|
||||
let table_route = TableRouteValue::logical(self.physical_table_id);
|
||||
Some((table_info, table_route))
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -22,7 +22,7 @@ use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
use crate::ddl::create_table_template::{
|
||||
use crate::ddl::create_table::template::{
|
||||
CreateRequestBuilder, build_template, build_template_from_raw_table_info,
|
||||
};
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
@@ -97,7 +97,7 @@ pub fn create_region_request_builder(
|
||||
|
||||
/// Builds a [CreateRequestBuilder] from a [RawTableInfo].
|
||||
///
|
||||
/// Note: **This method is only used for creating logical tables.**
|
||||
/// Note: This function is primarily intended for creating logical tables or allocating placeholder regions.
|
||||
pub fn create_region_request_builder_from_raw_table_info(
|
||||
raw_table_info: &RawTableInfo,
|
||||
physical_table_id: TableId,
|
||||
|
||||
@@ -12,74 +12,99 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod executor;
|
||||
pub mod template;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::region::region_request::Body as PbRegionRequest;
|
||||
use api::v1::region::{RegionRequest, RegionRequestHeader};
|
||||
use api::v1::CreateTableExpr;
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::error::{
|
||||
ExternalSnafu, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
};
|
||||
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, ProcedureId, Status};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{info, warn};
|
||||
use futures::future::join_all;
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::TABLE_COLUMN_METADATA_EXTENSION_KEY;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use store_api::storage::RegionNumber;
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_name::TableName;
|
||||
use table::table_reference::TableReference;
|
||||
pub(crate) use template::{CreateRequestBuilder, build_template_from_raw_table_info};
|
||||
|
||||
use crate::ddl::create_table_template::{CreateRequestBuilder, build_template};
|
||||
use crate::ddl::utils::raw_table_info::update_table_info_column_ids;
|
||||
use crate::ddl::utils::{
|
||||
add_peer_context_if_needed, convert_region_routes_to_detecting_regions,
|
||||
extract_column_metadatas, map_to_procedure_error, region_storage_path,
|
||||
};
|
||||
use crate::ddl::create_table::executor::CreateTableExecutor;
|
||||
use crate::ddl::create_table::template::build_template;
|
||||
use crate::ddl::utils::map_to_procedure_error;
|
||||
use crate::ddl::{DdlContext, TableMetadata};
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
|
||||
use crate::metrics;
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::rpc::router::{
|
||||
RegionRoute, find_leader_regions, find_leaders, operating_leader_regions,
|
||||
};
|
||||
use crate::rpc::router::{RegionRoute, operating_leader_regions};
|
||||
|
||||
pub struct CreateTableProcedure {
|
||||
pub context: DdlContext,
|
||||
pub creator: TableCreator,
|
||||
/// The serializable data.
|
||||
pub data: CreateTableData,
|
||||
/// The guards of opening.
|
||||
pub opening_regions: Vec<OperatingRegionGuard>,
|
||||
/// The executor of the procedure.
|
||||
pub executor: CreateTableExecutor,
|
||||
}
|
||||
|
||||
fn build_executor_from_create_table_data(
|
||||
create_table_expr: &CreateTableExpr,
|
||||
) -> Result<CreateTableExecutor> {
|
||||
let template = build_template(create_table_expr)?;
|
||||
let builder = CreateRequestBuilder::new(template, None);
|
||||
let table_name = TableName::new(
|
||||
create_table_expr.catalog_name.clone(),
|
||||
create_table_expr.schema_name.clone(),
|
||||
create_table_expr.table_name.clone(),
|
||||
);
|
||||
let executor =
|
||||
CreateTableExecutor::new(table_name, create_table_expr.create_if_not_exists, builder);
|
||||
Ok(executor)
|
||||
}
|
||||
|
||||
impl CreateTableProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateTable";
|
||||
|
||||
pub fn new(task: CreateTableTask, context: DdlContext) -> Self {
|
||||
Self {
|
||||
pub fn new(task: CreateTableTask, context: DdlContext) -> Result<Self> {
|
||||
let executor = build_executor_from_create_table_data(&task.create_table)?;
|
||||
|
||||
Ok(Self {
|
||||
context,
|
||||
creator: TableCreator::new(task),
|
||||
}
|
||||
data: CreateTableData::new(task),
|
||||
opening_regions: vec![],
|
||||
executor,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
|
||||
let data = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
let data: CreateTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
let create_table_expr = &data.task.create_table;
|
||||
let executor = build_executor_from_create_table_data(create_table_expr)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu {
|
||||
clean_poisons: false,
|
||||
})?;
|
||||
|
||||
Ok(CreateTableProcedure {
|
||||
context,
|
||||
creator: TableCreator {
|
||||
data,
|
||||
opening_regions: vec![],
|
||||
},
|
||||
data,
|
||||
opening_regions: vec![],
|
||||
executor,
|
||||
})
|
||||
}
|
||||
|
||||
fn table_info(&self) -> &RawTableInfo {
|
||||
&self.creator.data.task.table_info
|
||||
&self.data.task.table_info
|
||||
}
|
||||
|
||||
pub(crate) fn table_id(&self) -> TableId {
|
||||
@@ -87,8 +112,7 @@ impl CreateTableProcedure {
|
||||
}
|
||||
|
||||
fn region_wal_options(&self) -> Result<&HashMap<RegionNumber, String>> {
|
||||
self.creator
|
||||
.data
|
||||
self.data
|
||||
.region_wal_options
|
||||
.as_ref()
|
||||
.context(error::UnexpectedSnafu {
|
||||
@@ -97,8 +121,7 @@ impl CreateTableProcedure {
|
||||
}
|
||||
|
||||
fn table_route(&self) -> Result<&PhysicalTableRouteValue> {
|
||||
self.creator
|
||||
.data
|
||||
self.data
|
||||
.table_route
|
||||
.as_ref()
|
||||
.context(error::UnexpectedSnafu {
|
||||
@@ -106,17 +129,6 @@ impl CreateTableProcedure {
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub fn set_allocated_metadata(
|
||||
&mut self,
|
||||
table_id: TableId,
|
||||
table_route: PhysicalTableRouteValue,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) {
|
||||
self.creator
|
||||
.set_allocated_metadata(table_id, table_route, region_wal_options)
|
||||
}
|
||||
|
||||
/// On the prepare step, it performs:
|
||||
/// - Checks whether the table exists.
|
||||
/// - Allocates the table id.
|
||||
@@ -125,31 +137,16 @@ impl CreateTableProcedure {
|
||||
/// - TableName exists and `create_if_not_exists` is false.
|
||||
/// - Failed to allocate [TableMetadata].
|
||||
pub(crate) async fn on_prepare(&mut self) -> Result<Status> {
|
||||
let expr = &self.creator.data.task.create_table;
|
||||
let table_name_value = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.get(TableNameKey::new(
|
||||
&expr.catalog_name,
|
||||
&expr.schema_name,
|
||||
&expr.table_name,
|
||||
))
|
||||
let table_id = self
|
||||
.executor
|
||||
.on_prepare(&self.context.table_metadata_manager)
|
||||
.await?;
|
||||
|
||||
if let Some(value) = table_name_value {
|
||||
ensure!(
|
||||
expr.create_if_not_exists,
|
||||
error::TableAlreadyExistsSnafu {
|
||||
table_name: self.creator.data.table_ref().to_string(),
|
||||
}
|
||||
);
|
||||
|
||||
let table_id = value.table_id();
|
||||
// Return the table id if the table already exists.
|
||||
if let Some(table_id) = table_id {
|
||||
return Ok(Status::done_with_output(table_id));
|
||||
}
|
||||
|
||||
self.creator.data.state = CreateTableState::DatanodeCreateRegions;
|
||||
self.data.state = CreateTableState::DatanodeCreateRegions;
|
||||
let TableMetadata {
|
||||
table_id,
|
||||
table_route,
|
||||
@@ -157,23 +154,13 @@ impl CreateTableProcedure {
|
||||
} = self
|
||||
.context
|
||||
.table_metadata_allocator
|
||||
.create(&self.creator.data.task)
|
||||
.create(&self.data.task)
|
||||
.await?;
|
||||
self.creator
|
||||
.set_allocated_metadata(table_id, table_route, region_wal_options);
|
||||
self.set_allocated_metadata(table_id, table_route, region_wal_options);
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
pub fn new_region_request_builder(
|
||||
&self,
|
||||
physical_table_id: Option<TableId>,
|
||||
) -> Result<CreateRequestBuilder> {
|
||||
let create_table_expr = &self.creator.data.task.create_table;
|
||||
let template = build_template(create_table_expr)?;
|
||||
Ok(CreateRequestBuilder::new(template, physical_table_id))
|
||||
}
|
||||
|
||||
/// Creates regions on datanodes
|
||||
///
|
||||
/// Abort(non-retry):
|
||||
@@ -187,90 +174,29 @@ impl CreateTableProcedure {
|
||||
/// - [Code::Unavailable](tonic::status::Code::Unavailable)
|
||||
pub async fn on_datanode_create_regions(&mut self) -> Result<Status> {
|
||||
let table_route = self.table_route()?.clone();
|
||||
let request_builder = self.new_region_request_builder(None)?;
|
||||
// Registers opening regions
|
||||
let guards = self
|
||||
.creator
|
||||
.register_opening_regions(&self.context, &table_route.region_routes)?;
|
||||
let guards = self.register_opening_regions(&self.context, &table_route.region_routes)?;
|
||||
if !guards.is_empty() {
|
||||
self.creator.opening_regions = guards;
|
||||
self.opening_regions = guards;
|
||||
}
|
||||
self.create_regions(&table_route.region_routes, request_builder)
|
||||
.await
|
||||
self.create_regions(&table_route.region_routes).await
|
||||
}
|
||||
|
||||
async fn create_regions(
|
||||
&mut self,
|
||||
region_routes: &[RegionRoute],
|
||||
request_builder: CreateRequestBuilder,
|
||||
) -> Result<Status> {
|
||||
let create_table_data = &self.creator.data;
|
||||
// Safety: the region_wal_options must be allocated
|
||||
async fn create_regions(&mut self, region_routes: &[RegionRoute]) -> Result<Status> {
|
||||
let table_id = self.table_id();
|
||||
let region_wal_options = self.region_wal_options()?;
|
||||
let create_table_expr = &create_table_data.task.create_table;
|
||||
let catalog = &create_table_expr.catalog_name;
|
||||
let schema = &create_table_expr.schema_name;
|
||||
let storage_path = region_storage_path(catalog, schema);
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut create_region_tasks = Vec::with_capacity(leaders.len());
|
||||
let column_metadatas = self
|
||||
.executor
|
||||
.on_create_regions(
|
||||
&self.context.node_manager,
|
||||
table_id,
|
||||
region_routes,
|
||||
region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let partition_exprs = region_routes
|
||||
.iter()
|
||||
.map(|r| (r.region.id.region_number(), r.region.partition_expr()))
|
||||
.collect();
|
||||
|
||||
for datanode in leaders {
|
||||
let requester = self.context.node_manager.datanode(&datanode).await;
|
||||
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let mut requests = Vec::with_capacity(regions.len());
|
||||
for region_number in regions {
|
||||
let region_id = RegionId::new(self.table_id(), region_number);
|
||||
let create_region_request = request_builder.build_one(
|
||||
region_id,
|
||||
storage_path.clone(),
|
||||
region_wal_options,
|
||||
&partition_exprs,
|
||||
);
|
||||
requests.push(PbRegionRequest::Create(create_region_request));
|
||||
}
|
||||
|
||||
for request in requests {
|
||||
let request = RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(request),
|
||||
};
|
||||
|
||||
let datanode = datanode.clone();
|
||||
let requester = requester.clone();
|
||||
create_region_tasks.push(async move {
|
||||
requester
|
||||
.handle(request)
|
||||
.await
|
||||
.map_err(add_peer_context_if_needed(datanode))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut results = join_all(create_region_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
if let Some(column_metadatas) =
|
||||
extract_column_metadatas(&mut results, TABLE_COLUMN_METADATA_EXTENSION_KEY)?
|
||||
{
|
||||
self.creator.data.column_metadatas = column_metadatas;
|
||||
} else {
|
||||
warn!(
|
||||
"creating table result doesn't contains extension key `{TABLE_COLUMN_METADATA_EXTENSION_KEY}`,leaving the table's column metadata unchanged"
|
||||
);
|
||||
}
|
||||
|
||||
self.creator.data.state = CreateTableState::CreateMetadata;
|
||||
self.data.column_metadatas = column_metadatas;
|
||||
self.data.state = CreateTableState::CreateMetadata;
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
@@ -280,107 +206,33 @@ impl CreateTableProcedure {
|
||||
/// - Failed to create table metadata.
|
||||
async fn on_create_metadata(&mut self, pid: ProcedureId) -> Result<Status> {
|
||||
let table_id = self.table_id();
|
||||
let table_ref = self.creator.data.table_ref();
|
||||
let table_ref = self.data.table_ref();
|
||||
let manager = &self.context.table_metadata_manager;
|
||||
|
||||
let mut raw_table_info = self.table_info().clone();
|
||||
if !self.creator.data.column_metadatas.is_empty() {
|
||||
update_table_info_column_ids(&mut raw_table_info, &self.creator.data.column_metadatas);
|
||||
}
|
||||
let raw_table_info = self.table_info().clone();
|
||||
// Safety: the region_wal_options must be allocated.
|
||||
let region_wal_options = self.region_wal_options()?.clone();
|
||||
// Safety: the table_route must be allocated.
|
||||
let physical_table_route = self.table_route()?.clone();
|
||||
let detecting_regions =
|
||||
convert_region_routes_to_detecting_regions(&physical_table_route.region_routes);
|
||||
let table_route = TableRouteValue::Physical(physical_table_route);
|
||||
manager
|
||||
.create_table_metadata(raw_table_info, table_route, region_wal_options)
|
||||
self.executor
|
||||
.on_create_metadata(
|
||||
manager,
|
||||
&self.context.region_failure_detector_controller,
|
||||
raw_table_info,
|
||||
&self.data.column_metadatas,
|
||||
physical_table_route,
|
||||
region_wal_options,
|
||||
)
|
||||
.await?;
|
||||
self.context
|
||||
.register_failure_detectors(detecting_regions)
|
||||
.await;
|
||||
|
||||
info!(
|
||||
"Successfully created table: {}, table_id: {}, procedure_id: {}",
|
||||
table_ref, table_id, pid
|
||||
);
|
||||
|
||||
self.creator.opening_regions.clear();
|
||||
self.opening_regions.clear();
|
||||
Ok(Status::done_with_output(table_id))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Procedure for CreateTableProcedure {
|
||||
fn type_name(&self) -> &str {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
fn recover(&mut self) -> ProcedureResult<()> {
|
||||
// Only registers regions if the table route is allocated.
|
||||
if let Some(x) = &self.creator.data.table_route {
|
||||
self.creator.opening_regions = self
|
||||
.creator
|
||||
.register_opening_regions(&self.context, &x.region_routes)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu {
|
||||
clean_poisons: false,
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &self.creator.data.state;
|
||||
|
||||
let _timer = metrics::METRIC_META_PROCEDURE_CREATE_TABLE
|
||||
.with_label_values(&[state.as_ref()])
|
||||
.start_timer();
|
||||
|
||||
match state {
|
||||
CreateTableState::Prepare => self.on_prepare().await,
|
||||
CreateTableState::DatanodeCreateRegions => self.on_datanode_create_regions().await,
|
||||
CreateTableState::CreateMetadata => self.on_create_metadata(ctx.procedure_id).await,
|
||||
}
|
||||
.map_err(map_to_procedure_error)
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
serde_json::to_string(&self.creator.data).context(ToJsonSnafu)
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.creator.data.table_ref();
|
||||
|
||||
LockKey::new(vec![
|
||||
CatalogLock::Read(table_ref.catalog).into(),
|
||||
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
|
||||
TableNameLock::new(table_ref.catalog, table_ref.schema, table_ref.table).into(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TableCreator {
|
||||
/// The serializable data.
|
||||
pub data: CreateTableData,
|
||||
/// The guards of opening.
|
||||
pub opening_regions: Vec<OperatingRegionGuard>,
|
||||
}
|
||||
|
||||
impl TableCreator {
|
||||
pub fn new(task: CreateTableTask) -> Self {
|
||||
Self {
|
||||
data: CreateTableData {
|
||||
state: CreateTableState::Prepare,
|
||||
column_metadatas: vec![],
|
||||
task,
|
||||
table_route: None,
|
||||
region_wal_options: None,
|
||||
},
|
||||
opening_regions: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers and returns the guards of the opening region if they don't exist.
|
||||
fn register_opening_regions(
|
||||
@@ -389,7 +241,6 @@ impl TableCreator {
|
||||
region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<OperatingRegionGuard>> {
|
||||
let opening_regions = operating_leader_regions(region_routes);
|
||||
|
||||
if self.opening_regions.len() == opening_regions.len() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
@@ -409,7 +260,7 @@ impl TableCreator {
|
||||
Ok(opening_region_guards)
|
||||
}
|
||||
|
||||
fn set_allocated_metadata(
|
||||
pub fn set_allocated_metadata(
|
||||
&mut self,
|
||||
table_id: TableId,
|
||||
table_route: PhysicalTableRouteValue,
|
||||
@@ -421,6 +272,56 @@ impl TableCreator {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Procedure for CreateTableProcedure {
|
||||
fn type_name(&self) -> &str {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
fn recover(&mut self) -> ProcedureResult<()> {
|
||||
// Only registers regions if the table route is allocated.
|
||||
if let Some(x) = &self.data.table_route {
|
||||
self.opening_regions = self
|
||||
.register_opening_regions(&self.context, &x.region_routes)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu {
|
||||
clean_poisons: false,
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &self.data.state;
|
||||
|
||||
let _timer = metrics::METRIC_META_PROCEDURE_CREATE_TABLE
|
||||
.with_label_values(&[state.as_ref()])
|
||||
.start_timer();
|
||||
|
||||
match state {
|
||||
CreateTableState::Prepare => self.on_prepare().await,
|
||||
CreateTableState::DatanodeCreateRegions => self.on_datanode_create_regions().await,
|
||||
CreateTableState::CreateMetadata => self.on_create_metadata(ctx.procedure_id).await,
|
||||
}
|
||||
.map_err(map_to_procedure_error)
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
serde_json::to_string(&self.data).context(ToJsonSnafu)
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.data.table_ref();
|
||||
|
||||
LockKey::new(vec![
|
||||
CatalogLock::Read(table_ref.catalog).into(),
|
||||
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
|
||||
TableNameLock::new(table_ref.catalog, table_ref.schema, table_ref.table).into(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, AsRefStr, PartialEq)]
|
||||
pub enum CreateTableState {
|
||||
/// Prepares to create the table
|
||||
@@ -444,6 +345,16 @@ pub struct CreateTableData {
|
||||
}
|
||||
|
||||
impl CreateTableData {
|
||||
pub fn new(task: CreateTableTask) -> Self {
|
||||
CreateTableData {
|
||||
state: CreateTableState::Prepare,
|
||||
column_metadatas: vec![],
|
||||
task,
|
||||
table_route: None,
|
||||
region_wal_options: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn table_ref(&self) -> TableReference<'_> {
|
||||
self.task.table_ref()
|
||||
}
|
||||
|
||||
203
src/common/meta/src/ddl/create_table/executor.rs
Normal file
203
src/common/meta/src/ddl/create_table/executor.rs
Normal file
@@ -0,0 +1,203 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::region::region_request::Body as PbRegionRequest;
|
||||
use api::v1::region::{RegionRequest, RegionRequestHeader};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::warn;
|
||||
use futures::future::join_all;
|
||||
use snafu::ensure;
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::TABLE_COLUMN_METADATA_EXTENSION_KEY;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_name::TableName;
|
||||
|
||||
use crate::ddl::utils::raw_table_info::update_table_info_column_ids;
|
||||
use crate::ddl::utils::{
|
||||
add_peer_context_if_needed, convert_region_routes_to_detecting_regions,
|
||||
extract_column_metadatas, region_storage_path,
|
||||
};
|
||||
use crate::ddl::{CreateRequestBuilder, RegionFailureDetectorControllerRef};
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::rpc::router::{RegionRoute, find_leader_regions, find_leaders};
|
||||
|
||||
/// [CreateTableExecutor] performs:
|
||||
/// - Creates the metadata of the table.
|
||||
/// - Creates the regions on the Datanode nodes.
|
||||
pub struct CreateTableExecutor {
|
||||
create_if_not_exists: bool,
|
||||
table_name: TableName,
|
||||
builder: CreateRequestBuilder,
|
||||
}
|
||||
|
||||
impl CreateTableExecutor {
|
||||
/// Creates a new [`CreateTableExecutor`].
|
||||
pub fn new(
|
||||
table_name: TableName,
|
||||
create_if_not_exists: bool,
|
||||
builder: CreateRequestBuilder,
|
||||
) -> Self {
|
||||
Self {
|
||||
create_if_not_exists,
|
||||
table_name,
|
||||
builder,
|
||||
}
|
||||
}
|
||||
|
||||
/// On the prepare step, it performs:
|
||||
/// - Checks whether the table exists.
|
||||
/// - Returns the table id if the table exists.
|
||||
///
|
||||
/// Abort(non-retry):
|
||||
/// - Table exists and `create_if_not_exists` is `false`.
|
||||
/// - Failed to get the table name value.
|
||||
pub async fn on_prepare(
|
||||
&self,
|
||||
table_metadata_manager: &TableMetadataManagerRef,
|
||||
) -> Result<Option<TableId>> {
|
||||
let table_name_value = table_metadata_manager
|
||||
.table_name_manager()
|
||||
.get(TableNameKey::new(
|
||||
&self.table_name.catalog_name,
|
||||
&self.table_name.schema_name,
|
||||
&self.table_name.table_name,
|
||||
))
|
||||
.await?;
|
||||
|
||||
if let Some(value) = table_name_value {
|
||||
ensure!(
|
||||
self.create_if_not_exists,
|
||||
error::TableAlreadyExistsSnafu {
|
||||
table_name: self.table_name.to_string(),
|
||||
}
|
||||
);
|
||||
|
||||
return Ok(Some(value.table_id()));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn on_create_regions(
|
||||
&self,
|
||||
node_manager: &NodeManagerRef,
|
||||
table_id: TableId,
|
||||
region_routes: &[RegionRoute],
|
||||
region_wal_options: &HashMap<RegionNumber, String>,
|
||||
) -> Result<Vec<ColumnMetadata>> {
|
||||
let storage_path =
|
||||
region_storage_path(&self.table_name.catalog_name, &self.table_name.schema_name);
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut create_region_tasks = Vec::with_capacity(leaders.len());
|
||||
let partition_exprs = region_routes
|
||||
.iter()
|
||||
.map(|r| (r.region.id.region_number(), r.region.partition_expr()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for datanode in leaders {
|
||||
let requester = node_manager.datanode(&datanode).await;
|
||||
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let mut requests = Vec::with_capacity(regions.len());
|
||||
for region_number in regions {
|
||||
let region_id = RegionId::new(table_id, region_number);
|
||||
let create_region_request = self.builder.build_one(
|
||||
region_id,
|
||||
storage_path.clone(),
|
||||
region_wal_options,
|
||||
&partition_exprs,
|
||||
);
|
||||
requests.push(PbRegionRequest::Create(create_region_request));
|
||||
}
|
||||
|
||||
for request in requests {
|
||||
let request = RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(request),
|
||||
};
|
||||
|
||||
let datanode = datanode.clone();
|
||||
let requester = requester.clone();
|
||||
create_region_tasks.push(async move {
|
||||
requester
|
||||
.handle(request)
|
||||
.await
|
||||
.map_err(add_peer_context_if_needed(datanode))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut results = join_all(create_region_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let column_metadatas = if let Some(column_metadatas) =
|
||||
extract_column_metadatas(&mut results, TABLE_COLUMN_METADATA_EXTENSION_KEY)?
|
||||
{
|
||||
column_metadatas
|
||||
} else {
|
||||
warn!(
|
||||
"creating table result doesn't contains extension key `{TABLE_COLUMN_METADATA_EXTENSION_KEY}`,leaving the table's column metadata unchanged"
|
||||
);
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(column_metadatas)
|
||||
}
|
||||
|
||||
/// Creates table metadata
|
||||
///
|
||||
/// Abort(non-retry):
|
||||
/// - Failed to create table metadata.
|
||||
pub async fn on_create_metadata(
|
||||
&self,
|
||||
table_metadata_manager: &TableMetadataManagerRef,
|
||||
region_failure_detector_controller: &RegionFailureDetectorControllerRef,
|
||||
mut raw_table_info: RawTableInfo,
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
table_route: PhysicalTableRouteValue,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) -> Result<()> {
|
||||
if !column_metadatas.is_empty() {
|
||||
update_table_info_column_ids(&mut raw_table_info, column_metadatas);
|
||||
}
|
||||
let detecting_regions =
|
||||
convert_region_routes_to_detecting_regions(&table_route.region_routes);
|
||||
let table_route = TableRouteValue::Physical(table_route);
|
||||
table_metadata_manager
|
||||
.create_table_metadata(raw_table_info, table_route, region_wal_options)
|
||||
.await?;
|
||||
region_failure_detector_controller
|
||||
.register_failure_detectors(detecting_regions)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the builder of the executor.
|
||||
pub fn builder(&self) -> &CreateRequestBuilder {
|
||||
&self.builder
|
||||
}
|
||||
}
|
||||
@@ -20,19 +20,17 @@ use api::v1::region::{CreateRequest, RegionColumnDef};
|
||||
use api::v1::{ColumnDef, CreateTableExpr, SemanticType};
|
||||
use common_telemetry::warn;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
|
||||
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::wal_options_allocator::prepare_wal_options;
|
||||
use crate::wal_provider::prepare_wal_options;
|
||||
|
||||
/// Builds a [CreateRequest] from a [RawTableInfo].
|
||||
/// Constructs a [CreateRequest] based on the provided [RawTableInfo].
|
||||
///
|
||||
/// Note: **This method is only used for creating logical tables.**
|
||||
pub(crate) fn build_template_from_raw_table_info(
|
||||
raw_table_info: &RawTableInfo,
|
||||
) -> Result<CreateRequest> {
|
||||
/// Note: This function is primarily intended for creating logical tables or allocating placeholder regions.
|
||||
pub fn build_template_from_raw_table_info(raw_table_info: &RawTableInfo) -> Result<CreateRequest> {
|
||||
let primary_key_indices = &raw_table_info.meta.primary_key_indices;
|
||||
let column_defs = raw_table_info
|
||||
.meta
|
||||
@@ -57,7 +55,7 @@ pub(crate) fn build_template_from_raw_table_info(
|
||||
let options = HashMap::from(&raw_table_info.meta.options);
|
||||
let template = CreateRequest {
|
||||
region_id: 0,
|
||||
engine: METRIC_ENGINE_NAME.to_string(),
|
||||
engine: raw_table_info.meta.engine.clone(),
|
||||
column_defs,
|
||||
primary_key: primary_key_indices.iter().map(|i| *i as u32).collect(),
|
||||
path: String::new(),
|
||||
@@ -138,7 +136,7 @@ pub struct CreateRequestBuilder {
|
||||
}
|
||||
|
||||
impl CreateRequestBuilder {
|
||||
pub(crate) fn new(template: CreateRequest, physical_table_id: Option<TableId>) -> Self {
|
||||
pub fn new(template: CreateRequest, physical_table_id: Option<TableId>) -> Self {
|
||||
Self {
|
||||
template,
|
||||
physical_table_id,
|
||||
@@ -120,7 +120,13 @@ impl State for DropDatabaseExecutor {
|
||||
.await?;
|
||||
executor.invalidate_table_cache(ddl_ctx).await?;
|
||||
executor
|
||||
.on_drop_regions(ddl_ctx, &self.physical_region_routes, true)
|
||||
.on_drop_regions(
|
||||
&ddl_ctx.node_manager,
|
||||
&ddl_ctx.leader_region_registry,
|
||||
&self.physical_region_routes,
|
||||
true,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
info!("Table: {}({}) is dropped", self.table_name, self.table_id);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod executor;
|
||||
pub mod executor;
|
||||
mod metadata;
|
||||
|
||||
use std::collections::HashMap;
|
||||
@@ -156,7 +156,13 @@ impl DropTableProcedure {
|
||||
|
||||
pub async fn on_datanode_drop_regions(&mut self) -> Result<Status> {
|
||||
self.executor
|
||||
.on_drop_regions(&self.context, &self.data.physical_region_routes, false)
|
||||
.on_drop_regions(
|
||||
&self.context.node_manager,
|
||||
&self.context.leader_region_registry,
|
||||
&self.data.physical_region_routes,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
self.data.state = DropTableState::DeleteTombstone;
|
||||
Ok(Status::executing(true))
|
||||
|
||||
@@ -36,6 +36,8 @@ use crate::error::{self, Result};
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::region_registry::LeaderRegionRegistryRef;
|
||||
use crate::rpc::router::{
|
||||
RegionRoute, find_follower_regions, find_followers, find_leader_regions, find_leaders,
|
||||
operating_leader_regions,
|
||||
@@ -212,16 +214,18 @@ impl DropTableExecutor {
|
||||
/// Drops region on datanode.
|
||||
pub async fn on_drop_regions(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
node_manager: &NodeManagerRef,
|
||||
leader_region_registry: &LeaderRegionRegistryRef,
|
||||
region_routes: &[RegionRoute],
|
||||
fast_path: bool,
|
||||
force: bool,
|
||||
) -> Result<()> {
|
||||
// Drops leader regions on datanodes.
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
|
||||
let table_id = self.table_id;
|
||||
for datanode in leaders {
|
||||
let requester = ctx.node_manager.datanode(&datanode).await;
|
||||
let requester = node_manager.datanode(&datanode).await;
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let region_ids = regions
|
||||
.iter()
|
||||
@@ -238,6 +242,7 @@ impl DropTableExecutor {
|
||||
body: Some(region_request::Body::Drop(PbDropRegionRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
fast_path,
|
||||
force,
|
||||
})),
|
||||
};
|
||||
let datanode = datanode.clone();
|
||||
@@ -262,7 +267,7 @@ impl DropTableExecutor {
|
||||
let followers = find_followers(region_routes);
|
||||
let mut close_region_tasks = Vec::with_capacity(followers.len());
|
||||
for datanode in followers {
|
||||
let requester = ctx.node_manager.datanode(&datanode).await;
|
||||
let requester = node_manager.datanode(&datanode).await;
|
||||
let regions = find_follower_regions(region_routes, &datanode);
|
||||
let region_ids = regions
|
||||
.iter()
|
||||
@@ -307,8 +312,7 @@ impl DropTableExecutor {
|
||||
|
||||
// Deletes the leader region from registry.
|
||||
let region_ids = operating_leader_regions(region_routes);
|
||||
ctx.leader_region_registry
|
||||
.batch_delete(region_ids.into_iter().map(|(region_id, _)| region_id));
|
||||
leader_region_registry.batch_delete(region_ids.into_iter().map(|(region_id, _)| region_id));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -17,47 +17,47 @@ use std::sync::Arc;
|
||||
|
||||
use common_telemetry::{debug, info};
|
||||
use snafu::ensure;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
use store_api::storage::{RegionNumber, TableId};
|
||||
|
||||
use crate::ddl::TableMetadata;
|
||||
use crate::ddl::allocator::region_routes::RegionRoutesAllocatorRef;
|
||||
use crate::ddl::allocator::resource_id::ResourceIdAllocatorRef;
|
||||
use crate::ddl::allocator::wal_options::WalOptionsAllocatorRef;
|
||||
use crate::error::{Result, UnsupportedSnafu};
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::peer::{NoopPeerAllocator, PeerAllocatorRef};
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
use crate::sequence::SequenceRef;
|
||||
use crate::wal_options_allocator::{WalOptionsAllocatorRef, allocate_region_wal_options};
|
||||
|
||||
pub type TableMetadataAllocatorRef = Arc<TableMetadataAllocator>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TableMetadataAllocator {
|
||||
table_id_sequence: SequenceRef,
|
||||
table_id_allocator: ResourceIdAllocatorRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
peer_allocator: PeerAllocatorRef,
|
||||
region_routes_allocator: RegionRoutesAllocatorRef,
|
||||
}
|
||||
|
||||
impl TableMetadataAllocator {
|
||||
pub fn new(
|
||||
table_id_sequence: SequenceRef,
|
||||
table_id_allocator: ResourceIdAllocatorRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
) -> Self {
|
||||
Self::with_peer_allocator(
|
||||
table_id_sequence,
|
||||
table_id_allocator,
|
||||
wal_options_allocator,
|
||||
Arc::new(NoopPeerAllocator),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn with_peer_allocator(
|
||||
table_id_sequence: SequenceRef,
|
||||
table_id_allocator: ResourceIdAllocatorRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
peer_allocator: PeerAllocatorRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
table_id_sequence,
|
||||
table_id_allocator,
|
||||
wal_options_allocator,
|
||||
peer_allocator,
|
||||
region_routes_allocator: Arc::new(peer_allocator) as _,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ impl TableMetadataAllocator {
|
||||
|
||||
ensure!(
|
||||
!self
|
||||
.table_id_sequence
|
||||
.table_id_allocator
|
||||
.min_max()
|
||||
.await
|
||||
.contains(&(table_id as u64)),
|
||||
@@ -89,65 +89,35 @@ impl TableMetadataAllocator {
|
||||
|
||||
table_id
|
||||
} else {
|
||||
self.table_id_sequence.next().await? as TableId
|
||||
self.table_id_allocator.next().await? as TableId
|
||||
};
|
||||
Ok(table_id)
|
||||
}
|
||||
|
||||
fn create_wal_options(
|
||||
async fn create_wal_options(
|
||||
&self,
|
||||
table_route: &PhysicalTableRouteValue,
|
||||
region_numbers: &[RegionNumber],
|
||||
skip_wal: bool,
|
||||
) -> Result<HashMap<RegionNumber, String>> {
|
||||
let region_numbers = table_route
|
||||
.region_routes
|
||||
.iter()
|
||||
.map(|route| route.region.id.region_number())
|
||||
.collect();
|
||||
allocate_region_wal_options(region_numbers, &self.wal_options_allocator, skip_wal)
|
||||
self.wal_options_allocator
|
||||
.allocate(region_numbers, skip_wal)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn create_table_route(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
task: &CreateTableTask,
|
||||
partition_exprs: &[&str],
|
||||
) -> Result<PhysicalTableRouteValue> {
|
||||
let regions = task.partitions.len().max(1);
|
||||
let peers = self.peer_allocator.alloc(regions).await?;
|
||||
debug!("Allocated peers {:?} for table {}", peers, table_id);
|
||||
|
||||
let mut region_routes = task
|
||||
.partitions
|
||||
let region_number_and_partition_exprs = partition_exprs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, partition)| {
|
||||
let region = Region {
|
||||
id: RegionId::new(table_id, i as u32),
|
||||
partition_expr: partition.expression.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let peer = peers[i % peers.len()].clone();
|
||||
|
||||
RegionRoute {
|
||||
region,
|
||||
leader_peer: Some(peer),
|
||||
..Default::default()
|
||||
}
|
||||
})
|
||||
.map(|(i, partition)| (i as u32, *partition))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// If the table has no partitions, we need to create a default region.
|
||||
if region_routes.is_empty() {
|
||||
region_routes.push(RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 0),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(peers[0].clone()),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
let region_routes = self
|
||||
.region_routes_allocator
|
||||
.allocate(table_id, ®ion_number_and_partition_exprs)
|
||||
.await?;
|
||||
|
||||
Ok(PhysicalTableRouteValue::new(region_routes))
|
||||
}
|
||||
@@ -164,10 +134,20 @@ impl TableMetadataAllocator {
|
||||
|
||||
pub async fn create(&self, task: &CreateTableTask) -> Result<TableMetadata> {
|
||||
let table_id = self.allocate_table_id(&task.create_table.table_id).await?;
|
||||
let table_route = self.create_table_route(table_id, task).await?;
|
||||
|
||||
let region_wal_options =
|
||||
self.create_wal_options(&table_route, task.table_info.meta.options.skip_wal)?;
|
||||
let partition_exprs = task
|
||||
.partitions
|
||||
.iter()
|
||||
.map(|p| p.expression.as_str())
|
||||
.collect::<Vec<_>>();
|
||||
let table_route = self.create_table_route(table_id, &partition_exprs).await?;
|
||||
let region_numbers = table_route
|
||||
.region_routes
|
||||
.iter()
|
||||
.map(|route| route.region.id.region_number())
|
||||
.collect::<Vec<_>>();
|
||||
let region_wal_options = self
|
||||
.create_wal_options(®ion_numbers, task.table_info.meta.options.skip_wal)
|
||||
.await?;
|
||||
|
||||
debug!(
|
||||
"Allocated region wal options {:?} for table {}",
|
||||
@@ -181,7 +161,18 @@ impl TableMetadataAllocator {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn table_id_sequence(&self) -> SequenceRef {
|
||||
self.table_id_sequence.clone()
|
||||
/// Returns the table id allocator.
|
||||
pub fn table_id_allocator(&self) -> ResourceIdAllocatorRef {
|
||||
self.table_id_allocator.clone()
|
||||
}
|
||||
|
||||
/// Returns the wal options allocator.
|
||||
pub fn wal_options_allocator(&self) -> WalOptionsAllocatorRef {
|
||||
self.wal_options_allocator.clone()
|
||||
}
|
||||
|
||||
/// Returns the region routes allocator.
|
||||
pub fn region_routes_allocator(&self) -> RegionRoutesAllocatorRef {
|
||||
self.region_routes_allocator.clone()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,7 +128,6 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo {
|
||||
value_indices: vec![],
|
||||
engine: expr.engine.clone(),
|
||||
next_column_id: expr.column_defs.len() as u32,
|
||||
region_numbers: vec![],
|
||||
options: TableOptions::try_from_iter(&expr.table_options).unwrap(),
|
||||
created_on: DateTime::default(),
|
||||
updated_on: DateTime::default(),
|
||||
|
||||
@@ -166,7 +166,7 @@ async fn test_on_prepare_logical_table_exists_err() {
|
||||
.table_metadata_manager
|
||||
.create_logical_tables_metadata(vec![(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::logical(1024, vec![RegionId::new(1025, 1)]),
|
||||
TableRouteValue::logical(1024),
|
||||
)])
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -208,7 +208,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
|
||||
.table_metadata_manager
|
||||
.create_logical_tables_metadata(vec![(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::logical(1024, vec![RegionId::new(8192, 1)]),
|
||||
TableRouteValue::logical(1024),
|
||||
)])
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -252,7 +252,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
|
||||
.table_metadata_manager
|
||||
.create_logical_tables_metadata(vec![(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::logical(1024, vec![RegionId::new(8192, 1)]),
|
||||
TableRouteValue::logical(1024),
|
||||
)])
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -392,7 +392,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
|
||||
.table_metadata_manager
|
||||
.create_logical_tables_metadata(vec![(
|
||||
task.table_info.clone(),
|
||||
TableRouteValue::logical(1024, vec![RegionId::new(8192, 1)]),
|
||||
TableRouteValue::logical(1024),
|
||||
)])
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -496,10 +496,7 @@ async fn test_on_create_metadata_err() {
|
||||
task.table_info.ident.table_id = 1025;
|
||||
ddl_context
|
||||
.table_metadata_manager
|
||||
.create_logical_tables_metadata(vec![(
|
||||
task.table_info,
|
||||
TableRouteValue::logical(512, vec![RegionId::new(1026, 1)]),
|
||||
)])
|
||||
.create_logical_tables_metadata(vec![(task.table_info, TableRouteValue::logical(512))])
|
||||
.await
|
||||
.unwrap();
|
||||
// Triggers procedure to create table metadata
|
||||
|
||||
@@ -162,7 +162,7 @@ async fn test_on_prepare_table_exists_err() {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context).unwrap();
|
||||
let err = procedure.on_prepare().await.unwrap_err();
|
||||
assert_matches!(err, Error::TableAlreadyExists { .. });
|
||||
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
|
||||
@@ -185,7 +185,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context).unwrap();
|
||||
let status = procedure.on_prepare().await.unwrap();
|
||||
assert_matches!(status, Status::Done { output: Some(..) });
|
||||
let table_id = *status.downcast_output_ref::<u32>().unwrap();
|
||||
@@ -198,7 +198,7 @@ async fn test_on_prepare_without_create_if_table_exists() {
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let mut task = test_create_table_task("foo");
|
||||
task.create_table.create_if_not_exists = true;
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context).unwrap();
|
||||
let status = procedure.on_prepare().await.unwrap();
|
||||
assert_matches!(
|
||||
status,
|
||||
@@ -217,7 +217,7 @@ async fn test_on_datanode_create_regions_should_retry() {
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let task = test_create_table_task("foo");
|
||||
assert!(!task.create_table.create_if_not_exists);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
@@ -234,7 +234,7 @@ async fn test_on_datanode_create_regions_should_not_retry() {
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let task = test_create_table_task("foo");
|
||||
assert!(!task.create_table.create_if_not_exists);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
@@ -251,7 +251,7 @@ async fn test_on_create_metadata_error() {
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let task = test_create_table_task("foo");
|
||||
assert!(!task.create_table.create_if_not_exists);
|
||||
let mut procedure = CreateTableProcedure::new(task.clone(), ddl_context.clone());
|
||||
let mut procedure = CreateTableProcedure::new(task.clone(), ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
@@ -284,7 +284,7 @@ async fn test_on_create_metadata() {
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let task = test_create_table_task("foo");
|
||||
assert!(!task.create_table.create_if_not_exists);
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
@@ -312,16 +312,16 @@ async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
|
||||
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
|
||||
|
||||
let task = test_create_table_task("foo");
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone()).unwrap();
|
||||
|
||||
execute_procedure_until(&mut procedure, |p| {
|
||||
p.creator.data.state == CreateTableState::CreateMetadata
|
||||
p.data.state == CreateTableState::CreateMetadata
|
||||
})
|
||||
.await;
|
||||
|
||||
// Ensure that after running to the state `CreateMetadata`(just past `DatanodeCreateRegions`),
|
||||
// the opening regions should be recorded:
|
||||
let guards = &procedure.creator.opening_regions;
|
||||
let guards = &procedure.opening_regions;
|
||||
assert_eq!(guards.len(), 1);
|
||||
let (datanode_id, region_id) = (0, RegionId::new(procedure.table_id(), 0));
|
||||
assert_eq!(guards[0].info(), (datanode_id, region_id));
|
||||
@@ -334,7 +334,7 @@ async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
|
||||
execute_procedure_until_done(&mut procedure).await;
|
||||
|
||||
// Ensure that when run to the end, the opening regions should be cleared:
|
||||
let guards = &procedure.creator.opening_regions;
|
||||
let guards = &procedure.opening_regions;
|
||||
assert!(guards.is_empty());
|
||||
assert!(
|
||||
!ddl_context
|
||||
|
||||
@@ -259,7 +259,7 @@ async fn test_replace_table() {
|
||||
{
|
||||
// Create a `foo` table.
|
||||
let task = test_create_table_task("foo");
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone()).unwrap();
|
||||
procedure.on_prepare().await.unwrap();
|
||||
let ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
|
||||
@@ -14,15 +14,19 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::Repartition;
|
||||
use api::v1::alter_table_expr::Kind;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::{
|
||||
BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId, watcher,
|
||||
BoxedProcedure, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef,
|
||||
ProcedureWithId, watcher,
|
||||
};
|
||||
use common_telemetry::tracing_context::{FutureExt, TracingContext};
|
||||
use common_telemetry::{debug, info, tracing};
|
||||
use derive_builder::Builder;
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use store_api::storage::TableId;
|
||||
use table::table_name::TableName;
|
||||
|
||||
use crate::ddl::alter_database::AlterDatabaseProcedure;
|
||||
use crate::ddl::alter_logical_tables::AlterLogicalTablesProcedure;
|
||||
@@ -40,7 +44,8 @@ use crate::ddl::drop_view::DropViewProcedure;
|
||||
use crate::ddl::truncate_table::TruncateTableProcedure;
|
||||
use crate::ddl::{DdlContext, utils};
|
||||
use crate::error::{
|
||||
EmptyDdlTasksSnafu, ProcedureOutputSnafu, RegisterProcedureLoaderSnafu, Result,
|
||||
CreateRepartitionProcedureSnafu, EmptyDdlTasksSnafu, ProcedureOutputSnafu,
|
||||
RegisterProcedureLoaderSnafu, RegisterRepartitionProcedureLoaderSnafu, Result,
|
||||
SubmitProcedureSnafu, TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
|
||||
UnexpectedLogicalRouteTableSnafu, WaitProcedureSnafu,
|
||||
};
|
||||
@@ -90,6 +95,7 @@ pub type BoxedProcedureLoaderFactory = dyn Fn(DdlContext) -> BoxedProcedureLoade
|
||||
pub struct DdlManager {
|
||||
ddl_context: DdlContext,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
repartition_procedure_factory: RepartitionProcedureFactoryRef,
|
||||
#[cfg(feature = "enterprise")]
|
||||
trigger_ddl_manager: Option<TriggerDdlManagerRef>,
|
||||
}
|
||||
@@ -143,16 +149,37 @@ macro_rules! procedure_loader {
|
||||
};
|
||||
}
|
||||
|
||||
pub type RepartitionProcedureFactoryRef = Arc<dyn RepartitionProcedureFactory>;
|
||||
|
||||
pub trait RepartitionProcedureFactory: Send + Sync {
|
||||
fn create(
|
||||
&self,
|
||||
ddl_ctx: &DdlContext,
|
||||
table_name: TableName,
|
||||
table_id: TableId,
|
||||
from_exprs: Vec<String>,
|
||||
to_exprs: Vec<String>,
|
||||
) -> std::result::Result<BoxedProcedure, BoxedError>;
|
||||
|
||||
fn register_loaders(
|
||||
&self,
|
||||
ddl_ctx: &DdlContext,
|
||||
procedure_manager: &ProcedureManagerRef,
|
||||
) -> std::result::Result<(), BoxedError>;
|
||||
}
|
||||
|
||||
impl DdlManager {
|
||||
/// Returns a new [DdlManager] with all Ddl [BoxedProcedureLoader](common_procedure::procedure::BoxedProcedureLoader)s registered.
|
||||
pub fn try_new(
|
||||
ddl_context: DdlContext,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
repartition_procedure_factory: RepartitionProcedureFactoryRef,
|
||||
register_loaders: bool,
|
||||
) -> Result<Self> {
|
||||
let manager = Self {
|
||||
ddl_context,
|
||||
procedure_manager,
|
||||
repartition_procedure_factory,
|
||||
#[cfg(feature = "enterprise")]
|
||||
trigger_ddl_manager: None,
|
||||
};
|
||||
@@ -204,9 +231,63 @@ impl DdlManager {
|
||||
.context(RegisterProcedureLoaderSnafu { type_name })?;
|
||||
}
|
||||
|
||||
self.repartition_procedure_factory
|
||||
.register_loaders(&self.ddl_context, &self.procedure_manager)
|
||||
.context(RegisterRepartitionProcedureLoaderSnafu)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Submits a repartition procedure for the specified table.
|
||||
///
|
||||
/// This creates a repartition procedure using the provided `table_id`,
|
||||
/// `table_name`, and `Repartition` configuration, and then either executes it
|
||||
/// to completion or just submits it for asynchronous execution.
|
||||
///
|
||||
/// The `Repartition` argument contains the original (`from_partition_exprs`)
|
||||
/// and target (`into_partition_exprs`) partition expressions that define how
|
||||
/// the table should be repartitioned.
|
||||
///
|
||||
/// The `wait` flag controls whether this method waits for the repartition
|
||||
/// procedure to finish:
|
||||
/// - If `wait` is `true`, the procedure is executed and this method awaits
|
||||
/// its completion, returning both the generated `ProcedureId` and the
|
||||
/// final `Output` of the procedure.
|
||||
/// - If `wait` is `false`, the procedure is only submitted to the procedure
|
||||
/// manager for asynchronous execution, and this method returns the
|
||||
/// `ProcedureId` along with `None` as the output.
|
||||
async fn submit_repartition_task(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: TableName,
|
||||
Repartition {
|
||||
from_partition_exprs,
|
||||
into_partition_exprs,
|
||||
wait,
|
||||
}: Repartition,
|
||||
) -> Result<(ProcedureId, Option<Output>)> {
|
||||
let context = self.create_context();
|
||||
|
||||
let procedure = self
|
||||
.repartition_procedure_factory
|
||||
.create(
|
||||
&context,
|
||||
table_name,
|
||||
table_id,
|
||||
from_partition_exprs,
|
||||
into_partition_exprs,
|
||||
)
|
||||
.context(CreateRepartitionProcedureSnafu)?;
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
if wait {
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
} else {
|
||||
self.submit_procedure(procedure_with_id)
|
||||
.await
|
||||
.map(|p| (p, None))
|
||||
}
|
||||
}
|
||||
|
||||
/// Submits and executes an alter table task.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn submit_alter_table_task(
|
||||
@@ -214,13 +295,28 @@ impl DdlManager {
|
||||
table_id: TableId,
|
||||
alter_table_task: AlterTableTask,
|
||||
) -> Result<(ProcedureId, Option<Output>)> {
|
||||
let context = self.create_context();
|
||||
// make alter_table_task mutable so we can call .take() on its field
|
||||
let mut alter_table_task = alter_table_task;
|
||||
if let Some(Kind::Repartition(_)) = alter_table_task.alter_table.kind.as_ref()
|
||||
&& let Kind::Repartition(repartition) =
|
||||
alter_table_task.alter_table.kind.take().unwrap()
|
||||
{
|
||||
let table_name = TableName::new(
|
||||
alter_table_task.alter_table.catalog_name,
|
||||
alter_table_task.alter_table.schema_name,
|
||||
alter_table_task.alter_table.table_name,
|
||||
);
|
||||
return self
|
||||
.submit_repartition_task(table_id, table_name, repartition)
|
||||
.await;
|
||||
}
|
||||
|
||||
let context = self.create_context();
|
||||
let procedure = AlterTableProcedure::new(table_id, alter_table_task, context)?;
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a create table task.
|
||||
@@ -231,11 +327,11 @@ impl DdlManager {
|
||||
) -> Result<(ProcedureId, Option<Output>)> {
|
||||
let context = self.create_context();
|
||||
|
||||
let procedure = CreateTableProcedure::new(create_table_task, context);
|
||||
let procedure = CreateTableProcedure::new(create_table_task, context)?;
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a `[CreateViewTask]`.
|
||||
@@ -250,7 +346,7 @@ impl DdlManager {
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a create multiple logical table tasks.
|
||||
@@ -267,7 +363,7 @@ impl DdlManager {
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes alter multiple table tasks.
|
||||
@@ -284,7 +380,7 @@ impl DdlManager {
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a drop table task.
|
||||
@@ -299,7 +395,7 @@ impl DdlManager {
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a create database task.
|
||||
@@ -318,7 +414,7 @@ impl DdlManager {
|
||||
CreateDatabaseProcedure::new(catalog, schema, create_if_not_exists, options, context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a drop table task.
|
||||
@@ -335,7 +431,7 @@ impl DdlManager {
|
||||
let procedure = DropDatabaseProcedure::new(catalog, schema, drop_if_exists, context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
pub async fn submit_alter_database(
|
||||
@@ -346,7 +442,7 @@ impl DdlManager {
|
||||
let procedure = AlterDatabaseProcedure::new(alter_database_task, context)?;
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a create flow task.
|
||||
@@ -360,7 +456,7 @@ impl DdlManager {
|
||||
let procedure = CreateFlowProcedure::new(create_flow, query_context, context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a drop flow task.
|
||||
@@ -373,7 +469,7 @@ impl DdlManager {
|
||||
let procedure = DropFlowProcedure::new(drop_flow, context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a drop view task.
|
||||
@@ -386,7 +482,7 @@ impl DdlManager {
|
||||
let procedure = DropViewProcedure::new(drop_view, context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a truncate table task.
|
||||
@@ -407,7 +503,7 @@ impl DdlManager {
|
||||
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
/// Submits and executes a comment on task.
|
||||
@@ -420,10 +516,11 @@ impl DdlManager {
|
||||
let procedure = CommentOnProcedure::new(comment_on_task, context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
self.submit_procedure(procedure_with_id).await
|
||||
self.execute_procedure_and_wait(procedure_with_id).await
|
||||
}
|
||||
|
||||
async fn submit_procedure(
|
||||
/// Executes a procedure and waits for the result.
|
||||
async fn execute_procedure_and_wait(
|
||||
&self,
|
||||
procedure_with_id: ProcedureWithId,
|
||||
) -> Result<(ProcedureId, Option<Output>)> {
|
||||
@@ -442,6 +539,18 @@ impl DdlManager {
|
||||
Ok((procedure_id, output))
|
||||
}
|
||||
|
||||
/// Submits a procedure and returns the procedure id.
|
||||
async fn submit_procedure(&self, procedure_with_id: ProcedureWithId) -> Result<ProcedureId> {
|
||||
let procedure_id = procedure_with_id.id;
|
||||
let _ = self
|
||||
.procedure_manager
|
||||
.submit(procedure_with_id)
|
||||
.await
|
||||
.context(SubmitProcedureSnafu)?;
|
||||
|
||||
Ok(procedure_id)
|
||||
}
|
||||
|
||||
pub async fn submit_ddl_task(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
@@ -947,8 +1056,12 @@ async fn handle_comment_on_task(
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_procedure::local::LocalManager;
|
||||
use common_procedure::test_util::InMemoryPoisonStore;
|
||||
use common_procedure::{BoxedProcedure, ProcedureManagerRef};
|
||||
use store_api::storage::TableId;
|
||||
use table::table_name::TableName;
|
||||
|
||||
use super::DdlManager;
|
||||
use crate::cache_invalidator::DummyCacheInvalidator;
|
||||
@@ -959,6 +1072,7 @@ mod tests {
|
||||
use crate::ddl::table_meta::TableMetadataAllocator;
|
||||
use crate::ddl::truncate_table::TruncateTableProcedure;
|
||||
use crate::ddl::{DdlContext, NoopRegionFailureDetectorControl};
|
||||
use crate::ddl_manager::RepartitionProcedureFactory;
|
||||
use crate::key::TableMetadataManager;
|
||||
use crate::key::flow::FlowMetadataManager;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
@@ -968,7 +1082,7 @@ mod tests {
|
||||
use crate::region_registry::LeaderRegionRegistry;
|
||||
use crate::sequence::SequenceBuilder;
|
||||
use crate::state_store::KvStateStore;
|
||||
use crate::wal_options_allocator::WalOptionsAllocator;
|
||||
use crate::wal_provider::WalProvider;
|
||||
|
||||
/// A dummy implemented [NodeManager].
|
||||
pub struct DummyDatanodeManager;
|
||||
@@ -987,13 +1101,37 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
struct DummyRepartitionProcedureFactory;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl RepartitionProcedureFactory for DummyRepartitionProcedureFactory {
|
||||
fn create(
|
||||
&self,
|
||||
_ddl_ctx: &DdlContext,
|
||||
_table_name: TableName,
|
||||
_table_id: TableId,
|
||||
_from_exprs: Vec<String>,
|
||||
_to_exprs: Vec<String>,
|
||||
) -> std::result::Result<BoxedProcedure, BoxedError> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn register_loaders(
|
||||
&self,
|
||||
_ddl_ctx: &DdlContext,
|
||||
_procedure_manager: &ProcedureManagerRef,
|
||||
) -> std::result::Result<(), BoxedError> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_new() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
Arc::new(SequenceBuilder::new("test", kv_backend.clone()).build()),
|
||||
Arc::new(WalOptionsAllocator::default()),
|
||||
Arc::new(WalProvider::default()),
|
||||
));
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
|
||||
@@ -1023,6 +1161,7 @@ mod tests {
|
||||
region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
|
||||
},
|
||||
procedure_manager.clone(),
|
||||
Arc::new(DummyRepartitionProcedureFactory),
|
||||
true,
|
||||
);
|
||||
|
||||
|
||||
@@ -104,6 +104,20 @@ pub enum Error {
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to register repartition procedure loader"))]
|
||||
RegisterRepartitionProcedureLoader {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create repartition procedure"))]
|
||||
CreateRepartitionProcedure {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to submit procedure"))]
|
||||
SubmitProcedure {
|
||||
#[snafu(implicit)]
|
||||
@@ -1170,6 +1184,8 @@ impl ErrorExt for Error {
|
||||
PutPoison { source, .. } => source.status_code(),
|
||||
ConvertColumnDef { source, .. } => source.status_code(),
|
||||
ProcedureStateReceiver { source, .. } => source.status_code(),
|
||||
RegisterRepartitionProcedureLoader { source, .. } => source.status_code(),
|
||||
CreateRepartitionProcedure { source, .. } => source.status_code(),
|
||||
|
||||
ParseProcedureId { .. }
|
||||
| InvalidNumTopics { .. }
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -432,11 +432,11 @@ where
|
||||
pub struct GetFileRefs {
|
||||
/// List of region IDs to get file references from active FileHandles (in-memory).
|
||||
pub query_regions: Vec<RegionId>,
|
||||
/// Mapping from the source region ID (where to read the manifest) to
|
||||
/// the target region IDs (whose file references to look for).
|
||||
/// Key: The region ID of the manifest.
|
||||
/// Value: The list of region IDs to find references for in that manifest.
|
||||
pub related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
/// Mapping from the src region IDs (whose file references to look for) to
|
||||
/// the dst region IDs (where to read the manifests).
|
||||
/// Key: The source region IDs (where files originally came from).
|
||||
/// Value: The set of destination region IDs (whose manifests need to be read).
|
||||
pub related_regions: HashMap<RegionId, HashSet<RegionId>>,
|
||||
}
|
||||
|
||||
impl Display for GetFileRefs {
|
||||
|
||||
@@ -747,12 +747,10 @@ impl TableMetadataManager {
|
||||
/// The caller MUST ensure it has the exclusive access to `TableNameKey`.
|
||||
pub async fn create_table_metadata(
|
||||
&self,
|
||||
mut table_info: RawTableInfo,
|
||||
table_info: RawTableInfo,
|
||||
table_route_value: TableRouteValue,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) -> Result<()> {
|
||||
let region_numbers = table_route_value.region_numbers();
|
||||
table_info.meta.region_numbers = region_numbers;
|
||||
let table_id = table_info.ident.table_id;
|
||||
let engine = table_info.meta.engine.clone();
|
||||
|
||||
@@ -851,8 +849,7 @@ impl TableMetadataManager {
|
||||
on_create_table_route_failure: F2,
|
||||
}
|
||||
let mut on_failures = Vec::with_capacity(len);
|
||||
for (mut table_info, table_route_value) in tables_data {
|
||||
table_info.meta.region_numbers = table_route_value.region_numbers();
|
||||
for (table_info, table_route_value) in tables_data {
|
||||
let table_id = table_info.ident.table_id;
|
||||
|
||||
// Creates table name.
|
||||
@@ -1477,6 +1474,7 @@ mod tests {
|
||||
|
||||
use super::datanode_table::DatanodeTableKey;
|
||||
use super::test_utils;
|
||||
use crate::ddl::allocator::wal_options::WalOptionsAllocator;
|
||||
use crate::ddl::test_util::create_table::test_create_table_task;
|
||||
use crate::ddl::utils::region_storage_path;
|
||||
use crate::error::Result;
|
||||
@@ -1493,7 +1491,7 @@ mod tests {
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::{LeaderState, Region, RegionRoute, region_distribution};
|
||||
use crate::rpc::store::RangeRequest;
|
||||
use crate::wal_options_allocator::{WalOptionsAllocator, allocate_region_wal_options};
|
||||
use crate::wal_provider::WalProvider;
|
||||
|
||||
#[test]
|
||||
fn test_deserialized_value_with_bytes() {
|
||||
@@ -1543,8 +1541,8 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn new_test_table_info(region_numbers: impl Iterator<Item = u32>) -> TableInfo {
|
||||
test_utils::new_test_table_info(10, region_numbers)
|
||||
fn new_test_table_info() -> TableInfo {
|
||||
test_utils::new_test_table_info(10)
|
||||
}
|
||||
|
||||
fn new_test_table_names() -> HashSet<TableName> {
|
||||
@@ -1602,12 +1600,10 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv.clone());
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let wal_allocator = WalOptionsAllocator::RaftEngine;
|
||||
let regions = (0..16).collect();
|
||||
let region_wal_options =
|
||||
allocate_region_wal_options(regions, &wal_allocator, false).unwrap();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let wal_provider = WalProvider::RaftEngine;
|
||||
let regions: Vec<_> = (0..16).collect();
|
||||
let region_wal_options = wal_provider.allocate(®ions, false).await.unwrap();
|
||||
create_physical_table_metadata(
|
||||
&table_metadata_manager,
|
||||
table_info.clone(),
|
||||
@@ -1630,8 +1626,7 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let region_wal_options = create_mock_region_wal_options()
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, serde_json::to_string(&v).unwrap()))
|
||||
@@ -1713,8 +1708,7 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||
|
||||
@@ -1779,7 +1773,6 @@ mod tests {
|
||||
let table_info: RawTableInfo = test_utils::new_test_table_info_with_name(
|
||||
table_id,
|
||||
&format!("my_table_{}", table_id),
|
||||
region_routes.iter().map(|r| r.region.id.region_number()),
|
||||
)
|
||||
.into();
|
||||
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||
@@ -1800,8 +1793,7 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = &vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
let datanode_id = 2;
|
||||
let region_wal_options = create_mock_region_wal_options();
|
||||
@@ -1907,8 +1899,7 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
// creates metadata.
|
||||
create_physical_table_metadata(
|
||||
@@ -1984,8 +1975,7 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
// creates metadata.
|
||||
create_physical_table_metadata(
|
||||
@@ -2070,8 +2060,7 @@ mod tests {
|
||||
leader_down_since: None,
|
||||
},
|
||||
];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
let current_table_route_value = DeserializedValueWithBytes::from_inner(
|
||||
TableRouteValue::physical(region_routes.clone()),
|
||||
@@ -2153,8 +2142,7 @@ mod tests {
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
let region_route = new_test_region_route();
|
||||
let region_routes = vec![region_route.clone()];
|
||||
let table_info: RawTableInfo =
|
||||
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
|
||||
let table_info: RawTableInfo = new_test_table_info().into();
|
||||
let table_id = table_info.ident.table_id;
|
||||
let engine = table_info.meta.engine.as_str();
|
||||
let region_storage_path =
|
||||
@@ -2408,7 +2396,7 @@ mod tests {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
let table_metadata_manager = TableMetadataManager::new(mem_kv);
|
||||
|
||||
let view_info: RawTableInfo = new_test_table_info(Vec::<u32>::new().into_iter()).into();
|
||||
let view_info: RawTableInfo = new_test_table_info().into();
|
||||
|
||||
let view_id = view_info.ident.table_id;
|
||||
|
||||
|
||||
@@ -338,7 +338,6 @@ mod tests {
|
||||
next_column_id: 3,
|
||||
value_indices: vec![2, 3],
|
||||
options: Default::default(),
|
||||
region_numbers: vec![1],
|
||||
partition_key_indices: vec![],
|
||||
column_ids: vec![],
|
||||
};
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
@@ -62,16 +62,54 @@ pub enum TableRouteValue {
|
||||
Logical(LogicalTableRouteValue),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Default)]
|
||||
#[derive(Debug, PartialEq, Serialize, Clone, Default)]
|
||||
pub struct PhysicalTableRouteValue {
|
||||
// The region routes of the table.
|
||||
pub region_routes: Vec<RegionRoute>,
|
||||
// Tracks the highest region number ever allocated for the table.
|
||||
// This value only increases: adding a region updates it if needed,
|
||||
// and dropping regions does not decrease it.
|
||||
pub max_region_number: RegionNumber,
|
||||
// The version of the table route.
|
||||
version: u64,
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for PhysicalTableRouteValue {
|
||||
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
struct Helper {
|
||||
region_routes: Vec<RegionRoute>,
|
||||
#[serde(default)]
|
||||
max_region_number: Option<RegionNumber>,
|
||||
version: u64,
|
||||
}
|
||||
|
||||
let mut helper = Helper::deserialize(deserializer)?;
|
||||
// If the max region number is not provided, we will calculate it from the region routes.
|
||||
if helper.max_region_number.is_none() {
|
||||
let max_region = helper
|
||||
.region_routes
|
||||
.iter()
|
||||
.map(|r| r.region.id.region_number())
|
||||
.max()
|
||||
.unwrap_or_default();
|
||||
helper.max_region_number = Some(max_region);
|
||||
}
|
||||
|
||||
Ok(PhysicalTableRouteValue {
|
||||
region_routes: helper.region_routes,
|
||||
max_region_number: helper.max_region_number.unwrap_or_default(),
|
||||
version: helper.version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
|
||||
pub struct LogicalTableRouteValue {
|
||||
physical_table_id: TableId,
|
||||
region_ids: Vec<RegionId>,
|
||||
}
|
||||
|
||||
impl TableRouteValue {
|
||||
@@ -85,14 +123,7 @@ impl TableRouteValue {
|
||||
if table_id == physical_table_id {
|
||||
TableRouteValue::physical(region_routes)
|
||||
} else {
|
||||
let region_routes = region_routes
|
||||
.into_iter()
|
||||
.map(|region| {
|
||||
debug_assert_eq!(region.region.id.table_id(), physical_table_id);
|
||||
RegionId::new(table_id, region.region.id.region_number())
|
||||
})
|
||||
.collect();
|
||||
TableRouteValue::logical(physical_table_id, region_routes)
|
||||
TableRouteValue::logical(physical_table_id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,8 +131,8 @@ impl TableRouteValue {
|
||||
Self::Physical(PhysicalTableRouteValue::new(region_routes))
|
||||
}
|
||||
|
||||
pub fn logical(physical_table_id: TableId, region_ids: Vec<RegionId>) -> Self {
|
||||
Self::Logical(LogicalTableRouteValue::new(physical_table_id, region_ids))
|
||||
pub fn logical(physical_table_id: TableId) -> Self {
|
||||
Self::Logical(LogicalTableRouteValue::new(physical_table_id))
|
||||
}
|
||||
|
||||
/// Returns a new version [TableRouteValue] with `region_routes`.
|
||||
@@ -112,9 +143,19 @@ impl TableRouteValue {
|
||||
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
|
||||
}
|
||||
);
|
||||
let version = self.as_physical_table_route_ref().version;
|
||||
let physical_table_route = self.as_physical_table_route_ref();
|
||||
let original_max_region_number = physical_table_route.max_region_number;
|
||||
let new_max_region_number = region_routes
|
||||
.iter()
|
||||
.map(|r| r.region.id.region_number())
|
||||
.max()
|
||||
.unwrap_or_default();
|
||||
let version = physical_table_route.version;
|
||||
Ok(Self::Physical(PhysicalTableRouteValue {
|
||||
region_routes,
|
||||
// If region routes are added, we will update the max region number.
|
||||
// If region routes are removed, we will keep the original max region number.
|
||||
max_region_number: original_max_region_number.max(new_max_region_number),
|
||||
version: version + 1,
|
||||
}))
|
||||
}
|
||||
@@ -167,6 +208,20 @@ impl TableRouteValue {
|
||||
Ok(&self.as_physical_table_route_ref().region_routes)
|
||||
}
|
||||
|
||||
/// Returns the max region number of this [TableRouteValue::Physical].
|
||||
///
|
||||
/// # Panic
|
||||
/// If it is not the [`PhysicalTableRouteValue`].
|
||||
pub fn max_region_number(&self) -> Result<RegionNumber> {
|
||||
ensure!(
|
||||
self.is_physical(),
|
||||
UnexpectedLogicalRouteTableSnafu {
|
||||
err_msg: format!("{self:?} is a non-physical TableRouteValue."),
|
||||
}
|
||||
);
|
||||
Ok(self.as_physical_table_route_ref().max_region_number)
|
||||
}
|
||||
|
||||
/// Returns the reference of [`PhysicalTableRouteValue`].
|
||||
///
|
||||
/// # Panic
|
||||
@@ -207,11 +262,9 @@ impl TableRouteValue {
|
||||
.iter()
|
||||
.map(|region_route| region_route.region.id.region_number())
|
||||
.collect(),
|
||||
TableRouteValue::Logical(x) => x
|
||||
.region_ids()
|
||||
.iter()
|
||||
.map(|region_id| region_id.region_number())
|
||||
.collect(),
|
||||
TableRouteValue::Logical(_) => {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -237,28 +290,27 @@ impl MetadataValue for TableRouteValue {
|
||||
|
||||
impl PhysicalTableRouteValue {
|
||||
pub fn new(region_routes: Vec<RegionRoute>) -> Self {
|
||||
let max_region_number = region_routes
|
||||
.iter()
|
||||
.map(|r| r.region.id.region_number())
|
||||
.max()
|
||||
.unwrap_or_default();
|
||||
Self {
|
||||
region_routes,
|
||||
max_region_number,
|
||||
version: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LogicalTableRouteValue {
|
||||
pub fn new(physical_table_id: TableId, region_ids: Vec<RegionId>) -> Self {
|
||||
Self {
|
||||
physical_table_id,
|
||||
region_ids,
|
||||
}
|
||||
pub fn new(physical_table_id: TableId) -> Self {
|
||||
Self { physical_table_id }
|
||||
}
|
||||
|
||||
pub fn physical_table_id(&self) -> TableId {
|
||||
self.physical_table_id
|
||||
}
|
||||
|
||||
pub fn region_ids(&self) -> &Vec<RegionId> {
|
||||
&self.region_ids
|
||||
}
|
||||
}
|
||||
|
||||
impl MetadataKey<'_, TableRouteKey> for TableRouteKey {
|
||||
@@ -823,6 +875,57 @@ mod tests {
|
||||
use crate::rpc::router::Region;
|
||||
use crate::rpc::store::PutRequest;
|
||||
|
||||
#[test]
|
||||
fn test_update_table_route_max_region_number() {
|
||||
let table_route = PhysicalTableRouteValue::new(vec![
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(0, 1),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(0, 2),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
]);
|
||||
assert_eq!(table_route.max_region_number, 2);
|
||||
|
||||
// Shouldn't change the max region number.
|
||||
let new_table_route = TableRouteValue::Physical(table_route)
|
||||
.update(vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(0, 1),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}])
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
new_table_route
|
||||
.as_physical_table_route_ref()
|
||||
.max_region_number,
|
||||
2
|
||||
);
|
||||
|
||||
// Should increase the max region number.
|
||||
let new_table_route = new_table_route
|
||||
.update(vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(0, 3),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}])
|
||||
.unwrap()
|
||||
.into_physical_table_route();
|
||||
assert_eq!(new_table_route.max_region_number, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_table_route_compatibility() {
|
||||
let old_raw_v = r#"{"region_routes":[{"region":{"id":1,"name":"r1","partition":null,"attrs":{}},"leader_peer":{"id":2,"addr":"a2"},"follower_peers":[]},{"region":{"id":1,"name":"r1","partition":null,"attrs":{}},"leader_peer":{"id":2,"addr":"a2"},"follower_peers":[]}],"version":0}"#;
|
||||
@@ -863,6 +966,7 @@ mod tests {
|
||||
leader_down_since: None,
|
||||
},
|
||||
],
|
||||
max_region_number: 1,
|
||||
version: 0,
|
||||
});
|
||||
|
||||
@@ -900,7 +1004,6 @@ mod tests {
|
||||
let table_route_manager = TableRouteManager::new(kv.clone());
|
||||
let table_route_value = TableRouteValue::Logical(LogicalTableRouteValue {
|
||||
physical_table_id: 1023,
|
||||
region_ids: vec![RegionId::new(1023, 1)],
|
||||
});
|
||||
let (txn, _) = table_route_manager
|
||||
.table_route_storage()
|
||||
@@ -930,14 +1033,12 @@ mod tests {
|
||||
1024,
|
||||
TableRouteValue::Logical(LogicalTableRouteValue {
|
||||
physical_table_id: 1023,
|
||||
region_ids: vec![RegionId::new(1023, 1)],
|
||||
}),
|
||||
),
|
||||
(
|
||||
1025,
|
||||
TableRouteValue::Logical(LogicalTableRouteValue {
|
||||
physical_table_id: 1023,
|
||||
region_ids: vec![RegionId::new(1023, 2)],
|
||||
}),
|
||||
),
|
||||
];
|
||||
@@ -976,6 +1077,7 @@ mod tests {
|
||||
}],
|
||||
..Default::default()
|
||||
}],
|
||||
max_region_number: 0,
|
||||
version: 0,
|
||||
});
|
||||
|
||||
|
||||
@@ -19,11 +19,7 @@ use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use store_api::storage::TableId;
|
||||
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
|
||||
|
||||
pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
region_numbers: I,
|
||||
) -> TableInfo {
|
||||
pub fn new_test_table_info_with_name(table_id: TableId, table_name: &str) -> TableInfo {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
@@ -45,7 +41,6 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.region_numbers(region_numbers.into_iter().collect::<Vec<_>>())
|
||||
.build()
|
||||
.unwrap();
|
||||
TableInfoBuilder::default()
|
||||
@@ -56,9 +51,6 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
||||
table_id: TableId,
|
||||
region_numbers: I,
|
||||
) -> TableInfo {
|
||||
new_test_table_info_with_name(table_id, "mytable", region_numbers)
|
||||
pub fn new_test_table_info(table_id: TableId) -> TableInfo {
|
||||
new_test_table_info_with_name(table_id, "mytable")
|
||||
}
|
||||
|
||||
@@ -613,7 +613,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_put() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("put_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("put-test").await.unwrap();
|
||||
let prefix = b"put/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -623,7 +623,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_range() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("range_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("range-test").await.unwrap();
|
||||
let prefix = b"range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -633,7 +633,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_range_2() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("range2_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("range2-test").await.unwrap();
|
||||
let prefix = b"range2/";
|
||||
test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
@@ -642,7 +642,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_all_range() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("simple_range_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("simple_range-test").await.unwrap();
|
||||
let prefix = b"";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_simple_kv_range(&kv_backend).await;
|
||||
@@ -652,7 +652,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_get() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("batch_get_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("batch_get-test").await.unwrap();
|
||||
let prefix = b"batch_get/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -662,7 +662,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_delete() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete-test").await.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -672,7 +672,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_batch_delete_with_prefix() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_with_prefix_test")
|
||||
let kv_backend = build_mysql_kv_backend("batch_delete_with_prefix-test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
@@ -684,7 +684,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_delete_range() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("delete_range_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("delete_range-test").await.unwrap();
|
||||
let prefix = b"delete_range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -694,7 +694,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_compare_and_put() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("compare_and_put_test")
|
||||
let kv_backend = build_mysql_kv_backend("compare_and_put-test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"compare_and_put/";
|
||||
@@ -705,7 +705,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_mysql_txn() {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let kv_backend = build_mysql_kv_backend("txn_test").await.unwrap();
|
||||
let kv_backend = build_mysql_kv_backend("txn-test").await.unwrap();
|
||||
test_txn_one_compare_op(&kv_backend).await;
|
||||
text_txn_multi_compare_op(&kv_backend).await;
|
||||
test_txn_compare_equal(&kv_backend).await;
|
||||
|
||||
@@ -1105,7 +1105,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_put() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("put_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("put-test").await.unwrap();
|
||||
let prefix = b"put/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -1115,7 +1115,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_range() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("range_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("range-test").await.unwrap();
|
||||
let prefix = b"range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -1125,7 +1125,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_range_2() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("range2_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("range2-test").await.unwrap();
|
||||
let prefix = b"range2/";
|
||||
test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
unprepare_kv(&kv_backend, prefix).await;
|
||||
@@ -1134,7 +1134,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_all_range() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("simple_range_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("simple_range-test").await.unwrap();
|
||||
let prefix = b"";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_simple_kv_range(&kv_backend).await;
|
||||
@@ -1144,7 +1144,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_batch_get() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("batch_get_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("batch_get-test").await.unwrap();
|
||||
let prefix = b"batch_get/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -1154,7 +1154,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_batch_delete() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("batch_delete_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("batch_delete-test").await.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -1164,7 +1164,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_batch_delete_with_prefix() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("batch_delete_with_prefix_test")
|
||||
let kv_backend = build_pg_kv_backend("batch_delete_with_prefix-test")
|
||||
.await
|
||||
.unwrap();
|
||||
let prefix = b"batch_delete/";
|
||||
@@ -1176,7 +1176,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_delete_range() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("delete_range_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("delete_range-test").await.unwrap();
|
||||
let prefix = b"delete_range/";
|
||||
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
|
||||
@@ -1186,7 +1186,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_compare_and_put() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("compare_and_put_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("compare_and_put-test").await.unwrap();
|
||||
let prefix = b"compare_and_put/";
|
||||
let kv_backend = Arc::new(kv_backend);
|
||||
test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
|
||||
@@ -1195,7 +1195,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_pg_txn() {
|
||||
maybe_skip_postgres_integration_test!();
|
||||
let kv_backend = build_pg_kv_backend("txn_test").await.unwrap();
|
||||
let kv_backend = build_pg_kv_backend("txn-test").await.unwrap();
|
||||
test_txn_one_compare_op(&kv_backend).await;
|
||||
text_txn_multi_compare_op(&kv_backend).await;
|
||||
test_txn_compare_equal(&kv_backend).await;
|
||||
|
||||
@@ -48,7 +48,7 @@ pub mod stats;
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub mod test_util;
|
||||
pub mod util;
|
||||
pub mod wal_options_allocator;
|
||||
pub mod wal_provider;
|
||||
|
||||
// The id of the datanode.
|
||||
pub type DatanodeId = u64;
|
||||
|
||||
@@ -81,6 +81,13 @@ pub trait PeerAllocator: Send + Sync {
|
||||
|
||||
pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<T: PeerAllocator + ?Sized> PeerAllocator for Arc<T> {
|
||||
async fn alloc(&self, num: usize) -> Result<Vec<Peer>, Error> {
|
||||
T::alloc(self, num).await
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NoopPeerAllocator;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -144,6 +144,8 @@ impl ReconcileRegions {
|
||||
}
|
||||
|
||||
/// Creates a region request builder from a raw table info.
|
||||
///
|
||||
/// Note: This function is primarily intended for creating logical tables or allocating placeholder regions.
|
||||
fn create_region_request_from_raw_table_info(
|
||||
raw_table_info: &RawTableInfo,
|
||||
physical_table_id: TableId,
|
||||
|
||||
@@ -1639,7 +1639,6 @@ mod tests {
|
||||
value_indices: vec![2],
|
||||
engine: METRIC_ENGINE_NAME.to_string(),
|
||||
next_column_id: 0,
|
||||
region_numbers: vec![0],
|
||||
options: Default::default(),
|
||||
created_on: Default::default(),
|
||||
updated_on: Default::default(),
|
||||
|
||||
@@ -19,6 +19,7 @@ use common_telemetry::{debug, warn};
|
||||
use snafu::ensure;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::ddl::allocator::resource_id::ResourceIdAllocator;
|
||||
use crate::error::{self, Result};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::CompareAndPutRequest;
|
||||
@@ -82,6 +83,25 @@ pub struct Sequence {
|
||||
inner: Mutex<Inner>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ResourceIdAllocator for Sequence {
|
||||
async fn next(&self) -> Result<u64> {
|
||||
self.next().await
|
||||
}
|
||||
|
||||
async fn peek(&self) -> Result<u64> {
|
||||
self.peek().await
|
||||
}
|
||||
|
||||
async fn jump_to(&self, next: u64) -> Result<()> {
|
||||
self.jump_to(next).await
|
||||
}
|
||||
|
||||
async fn min_max(&self) -> Range<u64> {
|
||||
self.min_max().await
|
||||
}
|
||||
}
|
||||
|
||||
impl Sequence {
|
||||
/// Returns the next value and increments the sequence.
|
||||
pub async fn next(&self) -> Result<u64> {
|
||||
|
||||
@@ -40,8 +40,8 @@ use crate::peer::{Peer, PeerResolver};
|
||||
use crate::region_keeper::MemoryRegionKeeper;
|
||||
use crate::region_registry::LeaderRegionRegistry;
|
||||
use crate::sequence::SequenceBuilder;
|
||||
use crate::wal_options_allocator::topic_pool::KafkaTopicPool;
|
||||
use crate::wal_options_allocator::{WalOptionsAllocator, build_kafka_topic_creator};
|
||||
use crate::wal_provider::topic_pool::KafkaTopicPool;
|
||||
use crate::wal_provider::{WalProvider, build_kafka_topic_creator};
|
||||
use crate::{DatanodeId, FlownodeId};
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -187,7 +187,7 @@ pub fn new_ddl_context_with_kv_backend(
|
||||
.initial(1024)
|
||||
.build(),
|
||||
),
|
||||
Arc::new(WalOptionsAllocator::default()),
|
||||
Arc::new(WalProvider::default()),
|
||||
));
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let flow_metadata_allocator =
|
||||
|
||||
@@ -26,28 +26,46 @@ use common_wal::options::{KafkaWalOptions, WAL_OPTIONS_KEY, WalOptions};
|
||||
use snafu::{ResultExt, ensure};
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
|
||||
use crate::ddl::allocator::wal_options::WalOptionsAllocator;
|
||||
use crate::error::{EncodeWalOptionsSnafu, InvalidTopicNamePrefixSnafu, Result};
|
||||
use crate::key::TOPIC_NAME_PATTERN_REGEX;
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::leadership_notifier::LeadershipChangeListener;
|
||||
pub use crate::wal_options_allocator::topic_creator::{
|
||||
build_kafka_client, build_kafka_topic_creator,
|
||||
};
|
||||
use crate::wal_options_allocator::topic_pool::KafkaTopicPool;
|
||||
pub use crate::wal_provider::topic_creator::{build_kafka_client, build_kafka_topic_creator};
|
||||
use crate::wal_provider::topic_pool::KafkaTopicPool;
|
||||
|
||||
/// Allocates wal options in region granularity.
|
||||
/// Provides wal options in region granularity.
|
||||
#[derive(Default, Debug)]
|
||||
pub enum WalOptionsAllocator {
|
||||
pub enum WalProvider {
|
||||
#[default]
|
||||
RaftEngine,
|
||||
Kafka(KafkaTopicPool),
|
||||
}
|
||||
|
||||
/// Arc wrapper of WalOptionsAllocator.
|
||||
pub type WalOptionsAllocatorRef = Arc<WalOptionsAllocator>;
|
||||
/// Arc wrapper of WalProvider.
|
||||
pub type WalProviderRef = Arc<WalProvider>;
|
||||
|
||||
impl WalOptionsAllocator {
|
||||
/// Tries to start the allocator.
|
||||
#[async_trait::async_trait]
|
||||
impl WalOptionsAllocator for WalProvider {
|
||||
async fn allocate(
|
||||
&self,
|
||||
region_numbers: &[RegionNumber],
|
||||
skip_wal: bool,
|
||||
) -> Result<HashMap<RegionNumber, String>> {
|
||||
let wal_options = self
|
||||
.alloc_batch(region_numbers.len(), skip_wal)?
|
||||
.into_iter()
|
||||
.map(|wal_options| {
|
||||
serde_json::to_string(&wal_options).context(EncodeWalOptionsSnafu { wal_options })
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(region_numbers.iter().copied().zip(wal_options).collect())
|
||||
}
|
||||
}
|
||||
|
||||
impl WalProvider {
|
||||
/// Tries to start the provider.
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
match self {
|
||||
Self::RaftEngine => Ok(()),
|
||||
@@ -56,14 +74,14 @@ impl WalOptionsAllocator {
|
||||
}
|
||||
|
||||
/// Allocates a batch of wal options where each wal options goes to a region.
|
||||
/// If skip_wal is true, the wal options will be set to Noop regardless of the allocator type.
|
||||
/// If skip_wal is true, the wal options will be set to Noop regardless of the provider type.
|
||||
pub fn alloc_batch(&self, num_regions: usize, skip_wal: bool) -> Result<Vec<WalOptions>> {
|
||||
if skip_wal {
|
||||
return Ok(vec![WalOptions::Noop; num_regions]);
|
||||
}
|
||||
match self {
|
||||
WalOptionsAllocator::RaftEngine => Ok(vec![WalOptions::RaftEngine; num_regions]),
|
||||
WalOptionsAllocator::Kafka(topic_manager) => {
|
||||
WalProvider::RaftEngine => Ok(vec![WalOptions::RaftEngine; num_regions]),
|
||||
WalProvider::Kafka(topic_manager) => {
|
||||
let options_batch = topic_manager
|
||||
.select_batch(num_regions)?
|
||||
.into_iter()
|
||||
@@ -80,14 +98,14 @@ impl WalOptionsAllocator {
|
||||
|
||||
/// Returns true if it's the remote WAL.
|
||||
pub fn is_remote_wal(&self) -> bool {
|
||||
matches!(&self, WalOptionsAllocator::Kafka(_))
|
||||
matches!(&self, WalProvider::Kafka(_))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LeadershipChangeListener for WalOptionsAllocator {
|
||||
impl LeadershipChangeListener for WalProvider {
|
||||
fn name(&self) -> &str {
|
||||
"WalOptionsAllocator"
|
||||
"WalProvider"
|
||||
}
|
||||
|
||||
async fn on_leader_start(&self) -> Result<()> {
|
||||
@@ -99,13 +117,13 @@ impl LeadershipChangeListener for WalOptionsAllocator {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a wal options allocator based on the given configuration.
|
||||
pub async fn build_wal_options_allocator(
|
||||
/// Builds a wal provider based on the given configuration.
|
||||
pub async fn build_wal_provider(
|
||||
config: &MetasrvWalConfig,
|
||||
kv_backend: KvBackendRef,
|
||||
) -> Result<WalOptionsAllocator> {
|
||||
) -> Result<WalProvider> {
|
||||
match config {
|
||||
MetasrvWalConfig::RaftEngine => Ok(WalOptionsAllocator::RaftEngine),
|
||||
MetasrvWalConfig::RaftEngine => Ok(WalProvider::RaftEngine),
|
||||
MetasrvWalConfig::Kafka(kafka_config) => {
|
||||
let prefix = &kafka_config.kafka_topic.topic_name_prefix;
|
||||
ensure!(
|
||||
@@ -116,28 +134,11 @@ pub async fn build_wal_options_allocator(
|
||||
build_kafka_topic_creator(&kafka_config.connection, &kafka_config.kafka_topic)
|
||||
.await?;
|
||||
let topic_pool = KafkaTopicPool::new(kafka_config, kv_backend, topic_creator);
|
||||
Ok(WalOptionsAllocator::Kafka(topic_pool))
|
||||
Ok(WalProvider::Kafka(topic_pool))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocates a wal options for each region. The allocated wal options is encoded immediately.
|
||||
pub fn allocate_region_wal_options(
|
||||
regions: Vec<RegionNumber>,
|
||||
wal_options_allocator: &WalOptionsAllocator,
|
||||
skip_wal: bool,
|
||||
) -> Result<HashMap<RegionNumber, String>> {
|
||||
let wal_options = wal_options_allocator
|
||||
.alloc_batch(regions.len(), skip_wal)?
|
||||
.into_iter()
|
||||
.map(|wal_options| {
|
||||
serde_json::to_string(&wal_options).context(EncodeWalOptionsSnafu { wal_options })
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(regions.into_iter().zip(wal_options).collect())
|
||||
}
|
||||
|
||||
/// Inserts wal options into options.
|
||||
pub fn prepare_wal_options(
|
||||
options: &mut HashMap<String, String>,
|
||||
@@ -182,21 +183,19 @@ mod tests {
|
||||
use crate::error::Error;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::test_util::test_kafka_topic_pool;
|
||||
use crate::wal_options_allocator::selector::RoundRobinTopicSelector;
|
||||
use crate::wal_provider::selector::RoundRobinTopicSelector;
|
||||
|
||||
// Tests that the wal options allocator could successfully allocate raft-engine wal options.
|
||||
// Tests that the wal provider could successfully allocate raft-engine wal options.
|
||||
#[tokio::test]
|
||||
async fn test_allocator_with_raft_engine() {
|
||||
async fn test_provider_with_raft_engine() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
|
||||
let wal_config = MetasrvWalConfig::RaftEngine;
|
||||
let allocator = build_wal_options_allocator(&wal_config, kv_backend)
|
||||
.await
|
||||
.unwrap();
|
||||
allocator.start().await.unwrap();
|
||||
let provider = build_wal_provider(&wal_config, kv_backend).await.unwrap();
|
||||
provider.start().await.unwrap();
|
||||
|
||||
let num_regions = 32;
|
||||
let regions = (0..num_regions).collect::<Vec<_>>();
|
||||
let got = allocate_region_wal_options(regions.clone(), &allocator, false).unwrap();
|
||||
let got = provider.allocate(®ions, false).await.unwrap();
|
||||
|
||||
let encoded_wal_options = serde_json::to_string(&WalOptions::RaftEngine).unwrap();
|
||||
let expected = regions
|
||||
@@ -216,14 +215,14 @@ mod tests {
|
||||
},
|
||||
..Default::default()
|
||||
});
|
||||
let got = build_wal_options_allocator(&wal_config, kv_backend)
|
||||
let got = build_wal_provider(&wal_config, kv_backend)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_matches!(got, Error::InvalidTopicNamePrefix { .. });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_allocator_with_kafka_allocate_wal_options() {
|
||||
async fn test_provider_with_kafka_allocate_wal_options() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
maybe_skip_kafka_integration_test!();
|
||||
let num_topics = 5;
|
||||
@@ -240,13 +239,13 @@ mod tests {
|
||||
let topic_creator = topic_pool.topic_creator();
|
||||
topic_creator.delete_topics(&topics).await.unwrap();
|
||||
|
||||
// Creates an options allocator.
|
||||
let allocator = WalOptionsAllocator::Kafka(topic_pool);
|
||||
allocator.start().await.unwrap();
|
||||
// Creates an options provider.
|
||||
let provider = WalProvider::Kafka(topic_pool);
|
||||
provider.start().await.unwrap();
|
||||
|
||||
let num_regions = 3;
|
||||
let regions = (0..num_regions).collect::<Vec<_>>();
|
||||
let got = allocate_region_wal_options(regions.clone(), &allocator, false).unwrap();
|
||||
let got = provider.allocate(®ions, false).await.unwrap();
|
||||
|
||||
// Check the allocated wal options contain the expected topics.
|
||||
let expected = (0..num_regions)
|
||||
@@ -261,13 +260,13 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_allocator_with_skip_wal() {
|
||||
let allocator = WalOptionsAllocator::RaftEngine;
|
||||
allocator.start().await.unwrap();
|
||||
async fn test_provider_with_skip_wal() {
|
||||
let provider = WalProvider::RaftEngine;
|
||||
provider.start().await.unwrap();
|
||||
|
||||
let num_regions = 32;
|
||||
let regions = (0..num_regions).collect::<Vec<_>>();
|
||||
let got = allocate_region_wal_options(regions.clone(), &allocator, true).unwrap();
|
||||
let got = provider.allocate(®ions, true).await.unwrap();
|
||||
assert_eq!(got.len(), num_regions as usize);
|
||||
for wal_options in got.values() {
|
||||
assert_eq!(wal_options, &"{\"wal.provider\":\"noop\"}");
|
||||
@@ -22,9 +22,9 @@ use snafu::ensure;
|
||||
|
||||
use crate::error::{InvalidNumTopicsSnafu, Result};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::wal_options_allocator::selector::{RoundRobinTopicSelector, TopicSelectorRef};
|
||||
use crate::wal_options_allocator::topic_creator::KafkaTopicCreator;
|
||||
use crate::wal_options_allocator::topic_manager::KafkaTopicManager;
|
||||
use crate::wal_provider::selector::{RoundRobinTopicSelector, TopicSelectorRef};
|
||||
use crate::wal_provider::topic_creator::KafkaTopicCreator;
|
||||
use crate::wal_provider::topic_manager::KafkaTopicManager;
|
||||
|
||||
/// Topic pool for kafka remote wal.
|
||||
/// Responsible for:
|
||||
@@ -144,7 +144,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::test_util::test_kafka_topic_pool;
|
||||
use crate::wal_options_allocator::selector::RoundRobinTopicSelector;
|
||||
use crate::wal_provider::selector::RoundRobinTopicSelector;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pool_invalid_number_topics_err() {
|
||||
@@ -21,6 +21,7 @@ use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::tracing::{Span, info_span};
|
||||
use common_time::util::format_nanoseconds_human_readable;
|
||||
use datafusion::arrow::compute::cast;
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
@@ -218,6 +219,7 @@ pub struct RecordBatchStreamAdapter {
|
||||
metrics_2: Metrics,
|
||||
/// Display plan and metrics in verbose mode.
|
||||
explain_verbose: bool,
|
||||
span: Span,
|
||||
}
|
||||
|
||||
/// Json encoded metrics. Contains metric from a whole plan tree.
|
||||
@@ -238,22 +240,21 @@ impl RecordBatchStreamAdapter {
|
||||
metrics: None,
|
||||
metrics_2: Metrics::Unavailable,
|
||||
explain_verbose: false,
|
||||
span: Span::current(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn try_new_with_metrics_and_df_plan(
|
||||
stream: DfSendableRecordBatchStream,
|
||||
metrics: BaselineMetrics,
|
||||
df_plan: Arc<dyn ExecutionPlan>,
|
||||
) -> Result<Self> {
|
||||
pub fn try_new_with_span(stream: DfSendableRecordBatchStream, span: Span) -> Result<Self> {
|
||||
let schema =
|
||||
Arc::new(Schema::try_from(stream.schema()).context(error::SchemaConversionSnafu)?);
|
||||
let subspan = info_span!(parent: &span, "RecordBatchStreamAdapter");
|
||||
Ok(Self {
|
||||
schema,
|
||||
stream,
|
||||
metrics: Some(metrics),
|
||||
metrics_2: Metrics::Unresolved(df_plan),
|
||||
metrics: None,
|
||||
metrics_2: Metrics::Unavailable,
|
||||
explain_verbose: false,
|
||||
span: subspan,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -300,6 +301,8 @@ impl Stream for RecordBatchStreamAdapter {
|
||||
.map(|m| m.elapsed_compute().clone())
|
||||
.unwrap_or_default();
|
||||
let _guard = timer.timer();
|
||||
let poll_span = info_span!(parent: &self.span, "poll_next");
|
||||
let _entered = poll_span.enter();
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(df_record_batch)) => {
|
||||
|
||||
@@ -29,6 +29,7 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use adapter::RecordBatchMetrics;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::tracing::Span;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::arrow::array::{ArrayRef, AsArray, StringBuilder};
|
||||
use datatypes::arrow::compute::SortOptions;
|
||||
@@ -370,6 +371,7 @@ pub struct RecordBatchStreamWrapper<S> {
|
||||
pub stream: S,
|
||||
pub output_ordering: Option<Vec<OrderOption>>,
|
||||
pub metrics: Arc<ArcSwapOption<RecordBatchMetrics>>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl<S> RecordBatchStreamWrapper<S> {
|
||||
@@ -380,6 +382,7 @@ impl<S> RecordBatchStreamWrapper<S> {
|
||||
stream,
|
||||
output_ordering: None,
|
||||
metrics: Default::default(),
|
||||
span: Span::current(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -408,6 +411,7 @@ impl<S: Stream<Item = Result<RecordBatch>> + Unpin> Stream for RecordBatchStream
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let _entered = self.span.clone().entered();
|
||||
Pin::new(&mut self.stream).poll_next(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
|
||||
use api::v1::meta::GrantedRegion;
|
||||
use async_trait::async_trait;
|
||||
@@ -50,7 +50,7 @@ use crate::region_server::RegionServer;
|
||||
pub struct RegionAliveKeeper {
|
||||
region_server: RegionServer,
|
||||
tasks: Arc<Mutex<HashMap<RegionId, Arc<CountdownTaskHandle>>>>,
|
||||
heartbeat_interval_millis: u64,
|
||||
heartbeat_interval_millis: Arc<AtomicU64>,
|
||||
started: Arc<AtomicBool>,
|
||||
|
||||
/// The epoch when [RegionAliveKeeper] is created. It's used to get a monotonically non-decreasing
|
||||
@@ -67,18 +67,26 @@ impl RegionAliveKeeper {
|
||||
pub fn new(
|
||||
region_server: RegionServer,
|
||||
countdown_task_handler_ext: Option<CountdownTaskHandlerExtRef>,
|
||||
heartbeat_interval_millis: u64,
|
||||
heartbeat_interval: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
region_server,
|
||||
tasks: Arc::new(Mutex::new(HashMap::new())),
|
||||
heartbeat_interval_millis,
|
||||
heartbeat_interval_millis: Arc::new(AtomicU64::new(
|
||||
heartbeat_interval.as_millis() as u64
|
||||
)),
|
||||
started: Arc::new(AtomicBool::new(false)),
|
||||
epoch: Instant::now(),
|
||||
countdown_task_handler_ext,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the heartbeat interval with the value received from Metasrv.
|
||||
pub fn update_heartbeat_interval(&self, heartbeat_interval_millis: u64) {
|
||||
self.heartbeat_interval_millis
|
||||
.store(heartbeat_interval_millis, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
async fn find_handle(&self, region_id: RegionId) -> Option<Arc<CountdownTaskHandle>> {
|
||||
self.tasks.lock().await.get(®ion_id).cloned()
|
||||
}
|
||||
@@ -108,7 +116,9 @@ impl RegionAliveKeeper {
|
||||
};
|
||||
|
||||
if should_start {
|
||||
handle.start(self.heartbeat_interval_millis).await;
|
||||
handle
|
||||
.start(self.heartbeat_interval_millis.load(Ordering::Relaxed))
|
||||
.await;
|
||||
info!("Region alive countdown for region {region_id} is started!");
|
||||
} else {
|
||||
info!(
|
||||
@@ -230,8 +240,9 @@ impl RegionAliveKeeper {
|
||||
}
|
||||
|
||||
let tasks = self.tasks.lock().await;
|
||||
let interval = self.heartbeat_interval_millis.load(Ordering::Relaxed);
|
||||
for task in tasks.values() {
|
||||
task.start(self.heartbeat_interval_millis).await;
|
||||
task.start(interval).await;
|
||||
}
|
||||
|
||||
info!(
|
||||
@@ -505,7 +516,11 @@ mod test {
|
||||
let engine = Arc::new(engine);
|
||||
region_server.register_engine(engine.clone());
|
||||
|
||||
let alive_keeper = Arc::new(RegionAliveKeeper::new(region_server.clone(), None, 100));
|
||||
let alive_keeper = Arc::new(RegionAliveKeeper::new(
|
||||
region_server.clone(),
|
||||
None,
|
||||
Duration::from_millis(100),
|
||||
));
|
||||
|
||||
let region_id = RegionId::new(1024, 1);
|
||||
let builder = CreateRequestBuilder::new();
|
||||
|
||||
@@ -29,7 +29,6 @@ pub(crate) use object_store::config::ObjectStoreConfig;
|
||||
use query::options::QueryOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
use servers::http::HttpOptions;
|
||||
|
||||
/// Storage engine config
|
||||
@@ -71,7 +70,6 @@ pub struct DatanodeOptions {
|
||||
pub init_regions_in_background: bool,
|
||||
pub init_regions_parallelism: usize,
|
||||
pub grpc: GrpcOptions,
|
||||
pub heartbeat: HeartbeatOptions,
|
||||
pub http: HttpOptions,
|
||||
pub meta_client: Option<MetaClientOptions>,
|
||||
pub wal: DatanodeWalConfig,
|
||||
@@ -134,7 +132,6 @@ impl Default for DatanodeOptions {
|
||||
RegionEngineConfig::File(FileEngineConfig::default()),
|
||||
],
|
||||
logging: LoggingOptions::default(),
|
||||
heartbeat: HeartbeatOptions::datanode_default(),
|
||||
enable_telemetry: true,
|
||||
tracing: TracingOptions::default(),
|
||||
query: QueryOptions::default(),
|
||||
|
||||
@@ -22,7 +22,7 @@ use api::v1::meta::{DatanodeWorkloads, HeartbeatRequest, NodeInfo, Peer, RegionR
|
||||
use common_base::Plugins;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::datanode::REGION_STATISTIC_KEY;
|
||||
use common_meta::distributed_time_constants::META_KEEP_ALIVE_INTERVAL_SECS;
|
||||
use common_meta::distributed_time_constants::BASE_HEARTBEAT_INTERVAL;
|
||||
use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::suspend::SuspendHandler;
|
||||
@@ -35,6 +35,7 @@ use common_stat::ResourceStatRef;
|
||||
use common_telemetry::{debug, error, info, trace, warn};
|
||||
use common_workload::DatanodeWorkloadType;
|
||||
use meta_client::MetaClientRef;
|
||||
use meta_client::client::heartbeat::HeartbeatConfig;
|
||||
use meta_client::client::{HeartbeatSender, MetaClient};
|
||||
use servers::addrs;
|
||||
use snafu::{OptionExt as _, ResultExt};
|
||||
@@ -61,7 +62,6 @@ pub struct HeartbeatTask {
|
||||
running: Arc<AtomicBool>,
|
||||
meta_client: MetaClientRef,
|
||||
region_server: RegionServer,
|
||||
interval: u64,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
region_alive_keeper: Arc<RegionAliveKeeper>,
|
||||
resource_stat: ResourceStatRef,
|
||||
@@ -87,7 +87,7 @@ impl HeartbeatTask {
|
||||
let region_alive_keeper = Arc::new(RegionAliveKeeper::new(
|
||||
region_server.clone(),
|
||||
countdown_task_handler_ext,
|
||||
opts.heartbeat.interval.as_millis() as u64,
|
||||
BASE_HEARTBEAT_INTERVAL,
|
||||
));
|
||||
let resp_handler_executor = Arc::new(HandlerGroupExecutor::new(vec![
|
||||
region_alive_keeper.clone(),
|
||||
@@ -109,7 +109,6 @@ impl HeartbeatTask {
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
meta_client,
|
||||
region_server,
|
||||
interval: opts.heartbeat.interval.as_millis() as u64,
|
||||
resp_handler_executor,
|
||||
region_alive_keeper,
|
||||
resource_stat,
|
||||
@@ -123,9 +122,9 @@ impl HeartbeatTask {
|
||||
mailbox: MailboxRef,
|
||||
mut notify: Option<Arc<Notify>>,
|
||||
quit_signal: Arc<Notify>,
|
||||
) -> Result<HeartbeatSender> {
|
||||
) -> Result<(HeartbeatSender, HeartbeatConfig)> {
|
||||
let client_id = meta_client.id();
|
||||
let (tx, mut rx) = meta_client.heartbeat().await.context(MetaClientInitSnafu)?;
|
||||
let (tx, mut rx, config) = meta_client.heartbeat().await.context(MetaClientInitSnafu)?;
|
||||
|
||||
let mut last_received_lease = Instant::now();
|
||||
|
||||
@@ -175,7 +174,7 @@ impl HeartbeatTask {
|
||||
quit_signal.notify_one();
|
||||
info!("Heartbeat handling loop exit.");
|
||||
});
|
||||
Ok(tx)
|
||||
Ok((tx, config))
|
||||
}
|
||||
|
||||
async fn handle_response(
|
||||
@@ -204,13 +203,9 @@ impl HeartbeatTask {
|
||||
warn!("Heartbeat task started multiple times");
|
||||
return Ok(());
|
||||
}
|
||||
let interval = self.interval;
|
||||
let node_id = self.node_id;
|
||||
let node_epoch = self.node_epoch;
|
||||
let addr = &self.peer_addr;
|
||||
info!(
|
||||
"Starting heartbeat to Metasrv with interval {interval}. My node id is {node_id}, address is {addr}."
|
||||
);
|
||||
|
||||
let meta_client = self.meta_client.clone();
|
||||
let region_server_clone = self.region_server.clone();
|
||||
@@ -222,7 +217,7 @@ impl HeartbeatTask {
|
||||
|
||||
let quit_signal = Arc::new(Notify::new());
|
||||
|
||||
let mut tx = Self::create_streams(
|
||||
let (mut tx, config) = Self::create_streams(
|
||||
&meta_client,
|
||||
running.clone(),
|
||||
handler_executor.clone(),
|
||||
@@ -232,6 +227,17 @@ impl HeartbeatTask {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let interval = config.interval.as_millis() as u64;
|
||||
let mut retry_interval = config.retry_interval;
|
||||
|
||||
// Update RegionAliveKeeper with the interval from Metasrv
|
||||
self.region_alive_keeper.update_heartbeat_interval(interval);
|
||||
|
||||
info!(
|
||||
"Starting heartbeat to Metasrv with config: {}. My node id is {}, address is {}.",
|
||||
config, node_id, addr
|
||||
);
|
||||
|
||||
let self_peer = Some(Peer {
|
||||
id: node_id,
|
||||
addr: addr.clone(),
|
||||
@@ -244,6 +250,7 @@ impl HeartbeatTask {
|
||||
let total_cpu_millicores = self.resource_stat.get_total_cpu_millicores();
|
||||
let total_memory_bytes = self.resource_stat.get_total_memory_bytes();
|
||||
let resource_stat = self.resource_stat.clone();
|
||||
let region_alive_keeper = self.region_alive_keeper.clone();
|
||||
let gc_limiter = self
|
||||
.region_server
|
||||
.mito_engine()
|
||||
@@ -363,20 +370,23 @@ impl HeartbeatTask {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(new_tx) => {
|
||||
info!("Reconnected to metasrv");
|
||||
Ok((new_tx, new_config)) => {
|
||||
info!("Reconnected to metasrv, heartbeat config: {}", new_config);
|
||||
tx = new_tx;
|
||||
// Update retry_interval from new config
|
||||
retry_interval = new_config.retry_interval;
|
||||
// Update region_alive_keeper's heartbeat interval
|
||||
region_alive_keeper.update_heartbeat_interval(
|
||||
new_config.interval.as_millis() as u64,
|
||||
);
|
||||
// Triggers to send heartbeat immediately.
|
||||
sleep.as_mut().reset(Instant::now());
|
||||
}
|
||||
Err(e) => {
|
||||
// Before the META_LEASE_SECS expires,
|
||||
// any retries are meaningless, it always reads the old meta leader address.
|
||||
// Triggers to retry after META_KEEP_ALIVE_INTERVAL_SECS.
|
||||
sleep.as_mut().reset(
|
||||
Instant::now()
|
||||
+ Duration::from_secs(META_KEEP_ALIVE_INTERVAL_SECS),
|
||||
);
|
||||
// Triggers to retry after retry_interval from Metasrv config.
|
||||
sleep.as_mut().reset(Instant::now() + retry_interval);
|
||||
error!(e; "Failed to reconnect to metasrv!");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
|
||||
use common_meta::wal_options_allocator::prepare_wal_options;
|
||||
use common_meta::wal_provider::prepare_wal_options;
|
||||
use store_api::path_utils::table_dir;
|
||||
use store_api::region_request::{PathType, RegionOpenRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -1641,7 +1641,10 @@ mod tests {
|
||||
let response = mock_region_server
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
|
||||
RegionRequest::Drop(RegionDropRequest {
|
||||
fast_path: false,
|
||||
force: false,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1739,7 +1742,10 @@ mod tests {
|
||||
mock_region_server
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
|
||||
RegionRequest::Drop(RegionDropRequest {
|
||||
fast_path: false,
|
||||
force: false,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
@@ -18,7 +18,7 @@ use common_meta::DatanodeId;
|
||||
use common_meta::key::datanode_table::DatanodeTableManager;
|
||||
use common_meta::key::topic_region::{TopicRegionKey, TopicRegionManager, TopicRegionValue};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::wal_options_allocator::{extract_topic_from_wal_options, prepare_wal_options};
|
||||
use common_meta::wal_provider::{extract_topic_from_wal_options, prepare_wal_options};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::ResultExt;
|
||||
use store_api::path_utils::table_dir;
|
||||
|
||||
@@ -79,7 +79,7 @@ tokio.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
catalog.workspace = true
|
||||
catalog = { workspace = true, features = ["testing"] }
|
||||
common-catalog.workspace = true
|
||||
pretty_assertions.workspace = true
|
||||
prost.workspace = true
|
||||
|
||||
@@ -39,7 +39,6 @@ use query::QueryEngine;
|
||||
use query::options::QueryOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use session::context::QueryContext;
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
@@ -111,7 +110,6 @@ pub struct FlownodeOptions {
|
||||
pub meta_client: Option<MetaClientOptions>,
|
||||
pub logging: LoggingOptions,
|
||||
pub tracing: TracingOptions,
|
||||
pub heartbeat: HeartbeatOptions,
|
||||
pub query: QueryOptions,
|
||||
pub user_provider: Option<String>,
|
||||
pub memory: MemoryOptions,
|
||||
@@ -127,7 +125,6 @@ impl Default for FlownodeOptions {
|
||||
meta_client: None,
|
||||
logging: LoggingOptions::default(),
|
||||
tracing: TracingOptions::default(),
|
||||
heartbeat: HeartbeatOptions::default(),
|
||||
// flownode's query option is set to 1 to throttle flow's query so
|
||||
// that it won't use too much cpu or memory
|
||||
query: QueryOptions {
|
||||
|
||||
@@ -24,7 +24,7 @@ use super::*;
|
||||
pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
region_numbers: I,
|
||||
_region_numbers: I,
|
||||
) -> TableInfo {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
|
||||
@@ -46,7 +46,6 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
.primary_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.region_numbers(region_numbers.into_iter().collect::<Vec<_>>())
|
||||
.build()
|
||||
.unwrap();
|
||||
TableInfoBuilder::default()
|
||||
|
||||
@@ -30,7 +30,6 @@ use common_telemetry::{debug, error, info, warn};
|
||||
use greptime_proto::v1::meta::NodeInfo;
|
||||
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
|
||||
use servers::addrs;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
use snafu::ResultExt;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Duration;
|
||||
@@ -64,8 +63,6 @@ pub struct HeartbeatTask {
|
||||
node_epoch: u64,
|
||||
peer_addr: String,
|
||||
meta_client: Arc<MetaClient>,
|
||||
report_interval: Duration,
|
||||
retry_interval: Duration,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
running: Arc<AtomicBool>,
|
||||
query_stat_size: Option<SizeReportSender>,
|
||||
@@ -81,7 +78,6 @@ impl HeartbeatTask {
|
||||
pub fn new(
|
||||
opts: &FlownodeOptions,
|
||||
meta_client: Arc<MetaClient>,
|
||||
heartbeat_opts: HeartbeatOptions,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
resource_stat: ResourceStatRef,
|
||||
) -> Self {
|
||||
@@ -90,8 +86,6 @@ impl HeartbeatTask {
|
||||
node_epoch: common_time::util::current_time_millis() as u64,
|
||||
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
|
||||
meta_client,
|
||||
report_interval: heartbeat_opts.interval,
|
||||
retry_interval: heartbeat_opts.retry_interval,
|
||||
resp_handler_executor,
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
query_stat_size: None,
|
||||
@@ -113,22 +107,26 @@ impl HeartbeatTask {
|
||||
}
|
||||
|
||||
async fn create_streams(&self) -> Result<(), Error> {
|
||||
info!("Start to establish the heartbeat connection to metasrv.");
|
||||
let (req_sender, resp_stream) = self
|
||||
info!("Establishing heartbeat connection to Metasrv...");
|
||||
|
||||
let (req_sender, resp_stream, config) = self
|
||||
.meta_client
|
||||
.heartbeat()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
info!("Flownode's heartbeat connection has been established with metasrv");
|
||||
info!(
|
||||
"Heartbeat started for flownode {}, Metasrv config: {}",
|
||||
self.node_id, config
|
||||
);
|
||||
|
||||
let (outgoing_tx, outgoing_rx) = mpsc::channel(16);
|
||||
let mailbox = Arc::new(HeartbeatMailbox::new(outgoing_tx));
|
||||
|
||||
self.start_handle_resp_stream(resp_stream, mailbox);
|
||||
self.start_handle_resp_stream(resp_stream, mailbox, config.retry_interval);
|
||||
|
||||
self.start_heartbeat_report(req_sender, outgoing_rx);
|
||||
self.start_heartbeat_report(req_sender, outgoing_rx, config.interval);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -217,8 +215,8 @@ impl HeartbeatTask {
|
||||
&self,
|
||||
req_sender: HeartbeatSender,
|
||||
mut outgoing_rx: mpsc::Receiver<OutgoingMessage>,
|
||||
report_interval: Duration,
|
||||
) {
|
||||
let report_interval = self.report_interval;
|
||||
let node_epoch = self.node_epoch;
|
||||
let self_peer = Some(Peer {
|
||||
id: self.node_id,
|
||||
@@ -277,9 +275,13 @@ impl HeartbeatTask {
|
||||
});
|
||||
}
|
||||
|
||||
fn start_handle_resp_stream(&self, mut resp_stream: HeartbeatStream, mailbox: MailboxRef) {
|
||||
fn start_handle_resp_stream(
|
||||
&self,
|
||||
mut resp_stream: HeartbeatStream,
|
||||
mailbox: MailboxRef,
|
||||
retry_interval: Duration,
|
||||
) {
|
||||
let capture_self = self.clone();
|
||||
let retry_interval = self.retry_interval;
|
||||
|
||||
let _handle = common_runtime::spawn_hb(async move {
|
||||
loop {
|
||||
|
||||
@@ -25,7 +25,6 @@ use meta_client::MetaClientOptions;
|
||||
use query::options::QueryOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::server::ServerHandlers;
|
||||
use snafu::ResultExt;
|
||||
@@ -45,7 +44,6 @@ pub struct FrontendOptions {
|
||||
pub node_id: Option<String>,
|
||||
pub default_timezone: Option<String>,
|
||||
pub default_column_prefix: Option<String>,
|
||||
pub heartbeat: HeartbeatOptions,
|
||||
/// Maximum total memory for all concurrent write request bodies and messages (HTTP, gRPC, Flight).
|
||||
/// Set to 0 to disable the limit. Default: "0" (unlimited)
|
||||
pub max_in_flight_write_bytes: ReadableSize,
|
||||
@@ -82,7 +80,6 @@ impl Default for FrontendOptions {
|
||||
node_id: None,
|
||||
default_timezone: None,
|
||||
default_column_prefix: None,
|
||||
heartbeat: HeartbeatOptions::frontend_default(),
|
||||
max_in_flight_write_bytes: ReadableSize(0),
|
||||
write_bytes_exhausted_policy: OnExhaustedPolicy::default(),
|
||||
http: HttpOptions::default(),
|
||||
@@ -406,10 +403,6 @@ mod tests {
|
||||
..Default::default()
|
||||
},
|
||||
meta_client: Some(meta_client_options.clone()),
|
||||
heartbeat: HeartbeatOptions {
|
||||
interval: Duration::from_secs(1),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -419,7 +412,11 @@ mod tests {
|
||||
let meta_client = create_meta_client(&meta_client_options, server.clone()).await;
|
||||
let frontend = create_frontend(&options, meta_client).await?;
|
||||
|
||||
let frontend_heartbeat_interval = options.heartbeat.interval;
|
||||
use common_meta::distributed_time_constants::{
|
||||
BASE_HEARTBEAT_INTERVAL, frontend_heartbeat_interval,
|
||||
};
|
||||
let frontend_heartbeat_interval =
|
||||
frontend_heartbeat_interval(BASE_HEARTBEAT_INTERVAL) + Duration::from_secs(1);
|
||||
tokio::time::sleep(frontend_heartbeat_interval).await;
|
||||
// initial state: not suspend:
|
||||
assert!(!frontend.instance.is_suspended());
|
||||
|
||||
@@ -42,8 +42,6 @@ use crate::metrics::{HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT};
|
||||
pub struct HeartbeatTask {
|
||||
peer_addr: String,
|
||||
meta_client: Arc<MetaClient>,
|
||||
report_interval: Duration,
|
||||
retry_interval: Duration,
|
||||
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
|
||||
start_time_ms: u64,
|
||||
resource_stat: ResourceStatRef,
|
||||
@@ -66,8 +64,6 @@ impl HeartbeatTask {
|
||||
addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr))
|
||||
},
|
||||
meta_client,
|
||||
report_interval: opts.heartbeat.interval,
|
||||
retry_interval: opts.heartbeat.retry_interval,
|
||||
resp_handler_executor,
|
||||
start_time_ms: common_time::util::current_time_millis() as u64,
|
||||
resource_stat,
|
||||
@@ -75,27 +71,31 @@ impl HeartbeatTask {
|
||||
}
|
||||
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
let (req_sender, resp_stream) = self
|
||||
let (req_sender, resp_stream, config) = self
|
||||
.meta_client
|
||||
.heartbeat()
|
||||
.await
|
||||
.context(error::CreateMetaHeartbeatStreamSnafu)?;
|
||||
|
||||
info!("A heartbeat connection has been established with metasrv");
|
||||
info!("Heartbeat started with Metasrv config: {}", config);
|
||||
|
||||
let (outgoing_tx, outgoing_rx) = mpsc::channel(16);
|
||||
let mailbox = Arc::new(HeartbeatMailbox::new(outgoing_tx));
|
||||
|
||||
self.start_handle_resp_stream(resp_stream, mailbox);
|
||||
self.start_handle_resp_stream(resp_stream, mailbox, config.retry_interval);
|
||||
|
||||
self.start_heartbeat_report(req_sender, outgoing_rx);
|
||||
self.start_heartbeat_report(req_sender, outgoing_rx, config.interval);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn start_handle_resp_stream(&self, mut resp_stream: HeartbeatStream, mailbox: MailboxRef) {
|
||||
fn start_handle_resp_stream(
|
||||
&self,
|
||||
mut resp_stream: HeartbeatStream,
|
||||
mailbox: MailboxRef,
|
||||
retry_interval: Duration,
|
||||
) {
|
||||
let capture_self = self.clone();
|
||||
let retry_interval = self.retry_interval;
|
||||
|
||||
let _handle = common_runtime::spawn_hb(async move {
|
||||
loop {
|
||||
@@ -190,8 +190,8 @@ impl HeartbeatTask {
|
||||
&self,
|
||||
req_sender: HeartbeatSender,
|
||||
mut outgoing_rx: Receiver<OutgoingMessage>,
|
||||
report_interval: Duration,
|
||||
) {
|
||||
let report_interval = self.report_interval;
|
||||
let start_time_ms = self.start_time_ms;
|
||||
let self_peer = Some(Peer {
|
||||
// The node id will be actually calculated from its address (by hashing the address
|
||||
|
||||
@@ -91,6 +91,7 @@ use sql::statements::tql::Tql;
|
||||
use sqlparser::ast::ObjectName;
|
||||
pub use standalone::StandaloneDatanodeManager;
|
||||
use table::requests::{OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM};
|
||||
use tracing::Span;
|
||||
|
||||
use crate::error::{
|
||||
self, Error, ExecLogicalPlanSnafu, ExecutePromqlSnafu, ExternalSnafu, InvalidSqlSnafu,
|
||||
@@ -508,6 +509,7 @@ fn attach_timeout(output: Output, mut timeout: Duration) -> Result<Output> {
|
||||
stream: s,
|
||||
output_ordering: None,
|
||||
metrics: Default::default(),
|
||||
span: Span::current(),
|
||||
};
|
||||
Output::new(OutputData::Stream(Box::pin(stream)), output.meta)
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ use datafusion::datasource::DefaultTableSource;
|
||||
use futures::Stream;
|
||||
use futures::stream::StreamExt;
|
||||
use query::parser::PromQuery;
|
||||
use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
|
||||
use servers::interceptor::{GrpcQueryInterceptor, GrpcQueryInterceptorRef};
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use servers::query_handler::sql::SqlQueryHandler;
|
||||
@@ -73,10 +74,20 @@ impl GrpcQueryHandler for Instance {
|
||||
|
||||
let output = match request {
|
||||
Request::Inserts(requests) => self.handle_inserts(requests, ctx.clone()).await?,
|
||||
Request::RowInserts(requests) => {
|
||||
self.handle_row_inserts(requests, ctx.clone(), false, false)
|
||||
Request::RowInserts(requests) => match ctx.extension(PHYSICAL_TABLE_PARAM) {
|
||||
Some(physical_table) => {
|
||||
self.handle_metric_row_inserts(
|
||||
requests,
|
||||
ctx.clone(),
|
||||
physical_table.to_string(),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
None => {
|
||||
self.handle_row_inserts(requests, ctx.clone(), false, false)
|
||||
.await?
|
||||
}
|
||||
},
|
||||
Request::Deletes(requests) => self.handle_deletes(requests, ctx.clone()).await?,
|
||||
Request::RowDeletes(requests) => self.handle_row_deletes(requests, ctx.clone()).await?,
|
||||
Request::Query(query_request) => {
|
||||
|
||||
@@ -40,6 +40,7 @@ use servers::query_handler::{
|
||||
};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, ExecLogicalPlanSnafu, PromStoreRemoteQueryPlanSnafu, ReadTableSnafu, Result,
|
||||
@@ -78,6 +79,7 @@ fn negotiate_response_type(accepted_response_types: &[i32]) -> ServerResult<Resp
|
||||
Ok(ResponseType::try_from(*response_type).unwrap())
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(table_name))]
|
||||
async fn to_query_result(table_name: &str, output: Output) -> ServerResult<QueryResult> {
|
||||
let OutputData::Stream(stream) = output.data else {
|
||||
unreachable!()
|
||||
@@ -194,6 +196,7 @@ impl PromStoreProtocolHandler for Instance {
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(table_name))]
|
||||
async fn read(
|
||||
&self,
|
||||
request: ReadRequest,
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_telemetry::tracing;
|
||||
use promql_parser::label::{MatchOp, Matcher, Matchers};
|
||||
use query::promql;
|
||||
use query::promql::planner::PromPlanner;
|
||||
use servers::prom_store::{DATABASE_LABEL, SCHEMA_LABEL};
|
||||
use servers::prom_store::is_database_selection_label;
|
||||
use servers::prometheus;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -112,7 +112,7 @@ impl Instance {
|
||||
let table_schema = matchers
|
||||
.iter()
|
||||
.find_map(|m| {
|
||||
if (m.name == SCHEMA_LABEL || m.name == DATABASE_LABEL) && m.op == MatchOp::Equal {
|
||||
if is_database_selection_label(&m.name) && m.op == MatchOp::Equal {
|
||||
Some(m.value.clone())
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -97,12 +97,16 @@ impl Datanode for RegionInvoker {
|
||||
}
|
||||
|
||||
async fn handle_query(&self, request: QueryRequest) -> MetaResult<SendableRecordBatchStream> {
|
||||
let region_id = request.region_id.to_string();
|
||||
let span = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| TracingContext::from_w3c(&h.tracing_context))
|
||||
.unwrap_or_default()
|
||||
.attach(tracing::info_span!("RegionInvoker::handle_query"));
|
||||
.attach(tracing::info_span!(
|
||||
"RegionInvoker::handle_query",
|
||||
region_id = region_id
|
||||
));
|
||||
self.region_server
|
||||
.handle_read(request)
|
||||
.trace(span)
|
||||
|
||||
@@ -44,7 +44,7 @@ async fn run() {
|
||||
// required only when the heartbeat_client is enabled
|
||||
meta_client.ask_leader().await.unwrap();
|
||||
|
||||
let (sender, mut receiver) = meta_client.heartbeat().await.unwrap();
|
||||
let (sender, mut receiver, _config) = meta_client.heartbeat().await.unwrap();
|
||||
|
||||
// send heartbeats
|
||||
let _handle = tokio::spawn(async move {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod ask_leader;
|
||||
mod heartbeat;
|
||||
pub mod heartbeat;
|
||||
mod load_balance;
|
||||
mod procedure;
|
||||
|
||||
@@ -57,7 +57,7 @@ use common_meta::rpc::store::{
|
||||
};
|
||||
use common_telemetry::info;
|
||||
use futures::TryStreamExt;
|
||||
use heartbeat::Client as HeartbeatClient;
|
||||
use heartbeat::{Client as HeartbeatClient, HeartbeatConfig};
|
||||
use procedure::Client as ProcedureClient;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store::Client as StoreClient;
|
||||
@@ -594,7 +594,9 @@ impl MetaClient {
|
||||
/// The `datanode` needs to use the sender to continuously send heartbeat
|
||||
/// packets (some self-state data), and the receiver can receive a response
|
||||
/// from "metasrv" (which may contain some scheduling instructions).
|
||||
pub async fn heartbeat(&self) -> Result<(HeartbeatSender, HeartbeatStream)> {
|
||||
///
|
||||
/// Returns the heartbeat sender, stream, and configuration received from Metasrv.
|
||||
pub async fn heartbeat(&self) -> Result<(HeartbeatSender, HeartbeatStream, HeartbeatConfig)> {
|
||||
self.heartbeat_client()?.heartbeat().await
|
||||
}
|
||||
|
||||
@@ -873,7 +875,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_heartbeat() {
|
||||
let tc = new_client("test_heartbeat").await;
|
||||
let (sender, mut receiver) = tc.client.heartbeat().await.unwrap();
|
||||
let (sender, mut receiver, _config) = tc.client.heartbeat().await.unwrap();
|
||||
// send heartbeats
|
||||
|
||||
let request_sent = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
@@ -12,14 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::heartbeat_client::HeartbeatClient;
|
||||
use api::v1::meta::{HeartbeatRequest, HeartbeatResponse, RequestHeader, Role};
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::distributed_time_constants::BASE_HEARTBEAT_INTERVAL;
|
||||
use common_meta::util;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{info, warn};
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use tokio::sync::{RwLock, mpsc};
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
@@ -32,6 +35,52 @@ use crate::client::{Id, LeaderProviderRef};
|
||||
use crate::error;
|
||||
use crate::error::{InvalidResponseHeaderSnafu, Result};
|
||||
|
||||
/// Heartbeat configuration received from Metasrv during handshake.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct HeartbeatConfig {
|
||||
pub interval: Duration,
|
||||
pub retry_interval: Duration,
|
||||
}
|
||||
|
||||
impl Default for HeartbeatConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval: BASE_HEARTBEAT_INTERVAL,
|
||||
retry_interval: BASE_HEARTBEAT_INTERVAL,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for HeartbeatConfig {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"interval={:?}, retry={:?}",
|
||||
self.interval, self.retry_interval
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl HeartbeatConfig {
|
||||
/// Extract configuration from HeartbeatResponse.
|
||||
pub fn from_response(res: &HeartbeatResponse) -> Self {
|
||||
if let Some(cfg) = &res.heartbeat_config {
|
||||
// Metasrv provided complete configuration
|
||||
Self {
|
||||
interval: Duration::from_millis(cfg.heartbeat_interval_ms),
|
||||
retry_interval: Duration::from_millis(cfg.retry_interval_ms),
|
||||
}
|
||||
} else {
|
||||
let fallback = Self::default();
|
||||
warn!(
|
||||
"Metasrv didn't provide heartbeat_config, using default: {}",
|
||||
fallback
|
||||
);
|
||||
fallback
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HeartbeatSender {
|
||||
id: Id,
|
||||
role: Role,
|
||||
@@ -130,7 +179,9 @@ impl Client {
|
||||
inner.ask_leader().await
|
||||
}
|
||||
|
||||
pub async fn heartbeat(&mut self) -> Result<(HeartbeatSender, HeartbeatStream)> {
|
||||
pub async fn heartbeat(
|
||||
&mut self,
|
||||
) -> Result<(HeartbeatSender, HeartbeatStream, HeartbeatConfig)> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.ask_leader().await?;
|
||||
inner.heartbeat().await
|
||||
@@ -198,7 +249,7 @@ impl Inner {
|
||||
leader_provider.ask_leader().await
|
||||
}
|
||||
|
||||
async fn heartbeat(&self) -> Result<(HeartbeatSender, HeartbeatStream)> {
|
||||
async fn heartbeat(&self) -> Result<(HeartbeatSender, HeartbeatStream, HeartbeatConfig)> {
|
||||
ensure!(
|
||||
self.is_started(),
|
||||
error::IllegalGrpcClientStateSnafu {
|
||||
@@ -245,14 +296,18 @@ impl Inner {
|
||||
.map_err(error::Error::from)?
|
||||
.context(error::CreateHeartbeatStreamSnafu)?;
|
||||
|
||||
// Extract heartbeat configuration from handshake response
|
||||
let config = HeartbeatConfig::from_response(&res);
|
||||
|
||||
info!(
|
||||
"Success to create heartbeat stream to server: {}, response: {:#?}",
|
||||
leader_addr, res
|
||||
"Handshake successful with Metasrv at {}, received config: {}",
|
||||
leader_addr, config
|
||||
);
|
||||
|
||||
Ok((
|
||||
HeartbeatSender::new(self.id, self.role, sender),
|
||||
HeartbeatStream::new(self.id, stream),
|
||||
config,
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ impl<'a> ElectionSqlFactory<'a> {
|
||||
}
|
||||
|
||||
fn delete_value_sql(&self) -> String {
|
||||
format!("DELETE FROM {} WHERE k = ?;", self.table_name)
|
||||
format!("DELETE FROM `{}` WHERE k = ?;", self.table_name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1014,7 +1014,7 @@ mod tests {
|
||||
execution_timeout,
|
||||
Duration::from_secs(1),
|
||||
wait_timeout,
|
||||
table_name.unwrap_or("default_greptime_metakv_election"),
|
||||
table_name.unwrap_or("default_greptime_metakv-election"),
|
||||
);
|
||||
client.maybe_init_client().await?;
|
||||
if table_name.is_some() {
|
||||
@@ -1025,7 +1025,7 @@ mod tests {
|
||||
|
||||
async fn drop_table(client: &Mutex<ElectionMysqlClient>, table_name: &str) {
|
||||
let mut client = client.lock().await;
|
||||
let sql = format!("DROP TABLE IF EXISTS {};", table_name);
|
||||
let sql = format!("DROP TABLE IF EXISTS `{}`;", table_name);
|
||||
client.execute(sqlx::query(&sql), &sql).await.unwrap();
|
||||
}
|
||||
|
||||
@@ -1036,7 +1036,7 @@ mod tests {
|
||||
let value = "test_value".to_string();
|
||||
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_mysql_crud_greptime_metakv";
|
||||
let table_name = "test_mysql_crud_greptime-metakv";
|
||||
let candidate_lease_ttl = Duration::from_secs(10);
|
||||
let meta_lease_ttl = Duration::from_secs(2);
|
||||
|
||||
@@ -1050,7 +1050,7 @@ mod tests {
|
||||
let mut a = client.lock().await;
|
||||
let txn = a.transaction().await.unwrap();
|
||||
let mut executor = Executor::Txn(txn);
|
||||
let raw_query = format!("SELECT * FROM {} FOR UPDATE;", table_name);
|
||||
let raw_query = format!("SELECT * FROM `{}` FOR UPDATE;", table_name);
|
||||
let query = sqlx::query(&raw_query);
|
||||
let _ = executor.query(query, &raw_query).await.unwrap();
|
||||
}
|
||||
@@ -1186,7 +1186,7 @@ mod tests {
|
||||
let meta_lease_ttl = Duration::from_secs(2);
|
||||
let idle_session_timeout = Duration::from_secs(0);
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_candidate_registration_greptime_metakv";
|
||||
let table_name = "test_candidate_registration_greptime-metakv";
|
||||
let mut handles = vec![];
|
||||
let client = create_mysql_client(Some(table_name), execution_timeout, idle_session_timeout)
|
||||
.await
|
||||
@@ -1256,7 +1256,7 @@ mod tests {
|
||||
let mut client = election.client.lock().await;
|
||||
let txn = client.transaction().await.unwrap();
|
||||
let mut executor = Executor::Txn(txn);
|
||||
let raw_query = format!("SELECT * FROM {} FOR UPDATE;", table_name);
|
||||
let raw_query = format!("SELECT * FROM `{}` FOR UPDATE;", table_name);
|
||||
let query = sqlx::query(&raw_query);
|
||||
let _ = executor.query(query, &raw_query).await.unwrap();
|
||||
election.elected(executor, expected_lease).await
|
||||
@@ -1280,7 +1280,7 @@ mod tests {
|
||||
let execution_timeout = Duration::from_secs(10);
|
||||
let idle_session_timeout = Duration::from_secs(0);
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_elected_failed_greptime_metakv";
|
||||
let table_name = "test_elected_failed_greptime-metakv";
|
||||
let client = create_mysql_client(Some(table_name), execution_timeout, idle_session_timeout)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1313,7 +1313,7 @@ mod tests {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let leader_value = "test_leader".to_string();
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_reelection_greptime_metakv";
|
||||
let table_name = "test_reelection_greptime-metakv";
|
||||
let candidate_lease_ttl = Duration::from_secs(5);
|
||||
let meta_lease_ttl = Duration::from_secs(5);
|
||||
let execution_timeout = Duration::from_secs(10);
|
||||
@@ -1381,7 +1381,7 @@ mod tests {
|
||||
let execution_timeout = Duration::from_secs(10);
|
||||
let idle_session_timeout = Duration::from_secs(0);
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_elected_and_step_down_greptime_metakv";
|
||||
let table_name = "test_elected_and_step_down_greptime-metakv";
|
||||
let client = create_mysql_client(Some(table_name), execution_timeout, idle_session_timeout)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1470,7 +1470,7 @@ mod tests {
|
||||
maybe_skip_mysql_integration_test!();
|
||||
let leader_value = "test_leader".to_string();
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_leader_action_greptime_metakv";
|
||||
let table_name = "test_leader_action_greptime-metakv";
|
||||
let candidate_lease_ttl = Duration::from_secs(5);
|
||||
let meta_lease_ttl = Duration::from_secs(2);
|
||||
let execution_timeout = Duration::from_secs(10);
|
||||
@@ -1657,7 +1657,7 @@ mod tests {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let leader_value = "test_leader".to_string();
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_reset_campaign_greptime_metakv";
|
||||
let table_name = "test_reset_campaign_greptime-metakv";
|
||||
let candidate_lease_ttl = Duration::from_secs(5);
|
||||
let meta_lease_ttl = Duration::from_secs(2);
|
||||
let execution_timeout = Duration::from_secs(10);
|
||||
@@ -1695,7 +1695,7 @@ mod tests {
|
||||
let execution_timeout = Duration::from_secs(10);
|
||||
let idle_session_timeout = Duration::from_secs(0);
|
||||
let uuid = uuid::Uuid::new_v4().to_string();
|
||||
let table_name = "test_follower_action_greptime_metakv";
|
||||
let table_name = "test_follower_action_greptime-metakv";
|
||||
|
||||
let follower_client =
|
||||
create_mysql_client(Some(table_name), execution_timeout, idle_session_timeout)
|
||||
|
||||
@@ -110,11 +110,11 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Another procedure is opening the region: {} on peer: {}",
|
||||
"Another procedure is operating the region: {} on peer: {}",
|
||||
region_id,
|
||||
peer_id
|
||||
))]
|
||||
RegionOpeningRace {
|
||||
RegionOperatingRace {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
peer_id: DatanodeId,
|
||||
@@ -936,8 +936,8 @@ pub enum Error {
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build wal options allocator"))]
|
||||
BuildWalOptionsAllocator {
|
||||
#[snafu(display("Failed to build wal provider"))]
|
||||
BuildWalProvider {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
@@ -1038,7 +1038,7 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize partition expression: {}", source))]
|
||||
#[snafu(display("Failed to serialize partition expression"))]
|
||||
SerializePartitionExpr {
|
||||
#[snafu(source)]
|
||||
source: partition::error::Error,
|
||||
@@ -1046,6 +1046,20 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize partition expression"))]
|
||||
DeserializePartitionExpr {
|
||||
#[snafu(source)]
|
||||
source: partition::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Empty partition expression"))]
|
||||
EmptyPartitionExpr {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Partition expression mismatch, region id: {}, expected: {}, actual: {}",
|
||||
region_id,
|
||||
@@ -1059,6 +1073,51 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to allocate regions for table: {}", table_id))]
|
||||
AllocateRegions {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_id: TableId,
|
||||
#[snafu(source)]
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deallocate regions for table: {}", table_id))]
|
||||
DeallocateRegions {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_id: TableId,
|
||||
#[snafu(source)]
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build create request for table: {}", table_id))]
|
||||
BuildCreateRequest {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_id: TableId,
|
||||
#[snafu(source)]
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to allocate region routes for table: {}", table_id))]
|
||||
AllocateRegionRoutes {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_id: TableId,
|
||||
#[snafu(source)]
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to allocate wal options for table: {}", table_id))]
|
||||
AllocateWalOptions {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_id: TableId,
|
||||
#[snafu(source)]
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
@@ -1104,7 +1163,7 @@ impl ErrorExt for Error {
|
||||
| Error::Join { .. }
|
||||
| Error::ChooseItems { .. }
|
||||
| Error::FlowStateHandler { .. }
|
||||
| Error::BuildWalOptionsAllocator { .. }
|
||||
| Error::BuildWalProvider { .. }
|
||||
| Error::BuildPartitionClient { .. }
|
||||
| Error::BuildKafkaClient { .. } => StatusCode::Internal,
|
||||
|
||||
@@ -1118,7 +1177,8 @@ impl ErrorExt for Error {
|
||||
| Error::MailboxChannelClosed { .. }
|
||||
| Error::IsNotLeader { .. } => StatusCode::IllegalState,
|
||||
Error::RetryLaterWithSource { source, .. } => source.status_code(),
|
||||
Error::SerializePartitionExpr { source, .. } => source.status_code(),
|
||||
Error::SerializePartitionExpr { source, .. }
|
||||
| Error::DeserializePartitionExpr { source, .. } => source.status_code(),
|
||||
|
||||
Error::Unsupported { .. } => StatusCode::Unsupported,
|
||||
|
||||
@@ -1144,7 +1204,8 @@ impl ErrorExt for Error {
|
||||
| Error::RepartitionSourceRegionMissing { .. }
|
||||
| Error::RepartitionTargetRegionMissing { .. }
|
||||
| Error::PartitionExprMismatch { .. }
|
||||
| Error::RepartitionSourceExprMismatch { .. } => StatusCode::InvalidArguments,
|
||||
| Error::RepartitionSourceExprMismatch { .. }
|
||||
| Error::EmptyPartitionExpr { .. } => StatusCode::InvalidArguments,
|
||||
Error::LeaseKeyFromUtf8 { .. }
|
||||
| Error::LeaseValueFromUtf8 { .. }
|
||||
| Error::InvalidRegionKeyFromUtf8 { .. }
|
||||
@@ -1154,7 +1215,7 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidUtf8Value { .. }
|
||||
| Error::UnexpectedInstructionReply { .. }
|
||||
| Error::Unexpected { .. }
|
||||
| Error::RegionOpeningRace { .. }
|
||||
| Error::RegionOperatingRace { .. }
|
||||
| Error::RegionRouteNotFound { .. }
|
||||
| Error::MigrationAbort { .. }
|
||||
| Error::MigrationRunning { .. }
|
||||
@@ -1206,6 +1267,11 @@ impl ErrorExt for Error {
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
Error::RepartitionCreateSubtasks { source, .. } => source.status_code(),
|
||||
Error::RepartitionSubprocedureStateReceiver { source, .. } => source.status_code(),
|
||||
Error::AllocateRegions { source, .. } => source.status_code(),
|
||||
Error::DeallocateRegions { source, .. } => source.status_code(),
|
||||
Error::AllocateRegionRoutes { source, .. } => source.status_code(),
|
||||
Error::AllocateWalOptions { source, .. } => source.status_code(),
|
||||
Error::BuildCreateRequest { source, .. } => source.status_code(),
|
||||
Error::NoEnoughAvailableNode { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
|
||||
@@ -12,9 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// TODO(discord9): remove this once gc scheduler is fully merged
|
||||
#![allow(unused)]
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
@@ -29,6 +26,7 @@ mod options;
|
||||
mod procedure;
|
||||
mod scheduler;
|
||||
mod tracker;
|
||||
mod util;
|
||||
|
||||
pub use options::GcSchedulerOptions;
|
||||
pub use procedure::BatchGcProcedure;
|
||||
|
||||
@@ -12,29 +12,22 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::instruction::{
|
||||
GcRegions, GetFileRefs, GetFileRefsReply, Instruction, InstructionReply,
|
||||
};
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::key::table_route::PhysicalTableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
use common_procedure::{ProcedureManagerRef, ProcedureWithId, watcher};
|
||||
use common_telemetry::{debug, error, warn};
|
||||
use common_telemetry::debug;
|
||||
use snafu::{OptionExt as _, ResultExt as _};
|
||||
use store_api::storage::{FileId, FileRefsManifest, GcReport, RegionId};
|
||||
use store_api::storage::{GcReport, RegionId};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::cluster::MetaPeerClientRef;
|
||||
use crate::error::{self, Result, TableMetadataManagerSnafu, UnexpectedSnafu};
|
||||
use crate::gc::Region2Peers;
|
||||
use crate::gc::procedure::{BatchGcProcedure, GcRegionProcedure};
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::service::mailbox::{Channel, MailboxRef};
|
||||
use crate::error::{self, Result, TableMetadataManagerSnafu};
|
||||
use crate::gc::procedure::BatchGcProcedure;
|
||||
use crate::service::mailbox::MailboxRef;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub(crate) trait SchedulerCtx: Send + Sync {
|
||||
@@ -45,19 +38,9 @@ pub(crate) trait SchedulerCtx: Send + Sync {
|
||||
table_id: TableId,
|
||||
) -> Result<(TableId, PhysicalTableRouteValue)>;
|
||||
|
||||
async fn get_file_references(
|
||||
&self,
|
||||
query_regions: &[RegionId],
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
region_routes: &Region2Peers,
|
||||
timeout: Duration,
|
||||
) -> Result<FileRefsManifest>;
|
||||
|
||||
async fn gc_regions(
|
||||
&self,
|
||||
peer: Peer,
|
||||
region_ids: &[RegionId],
|
||||
file_refs_manifest: &FileRefsManifest,
|
||||
full_file_listing: bool,
|
||||
timeout: Duration,
|
||||
) -> Result<GcReport>;
|
||||
@@ -100,7 +83,7 @@ impl SchedulerCtx for DefaultGcSchedulerCtx {
|
||||
let dn_stats = self.meta_peer_client.get_all_dn_stat_kvs().await?;
|
||||
let mut table_to_region_stats: HashMap<TableId, Vec<RegionStat>> = HashMap::new();
|
||||
for (_dn_id, stats) in dn_stats {
|
||||
let mut stats = stats.stats;
|
||||
let stats = stats.stats;
|
||||
|
||||
let Some(latest_stat) = stats.iter().max_by_key(|s| s.timestamp_millis).cloned() else {
|
||||
continue;
|
||||
@@ -129,142 +112,34 @@ impl SchedulerCtx for DefaultGcSchedulerCtx {
|
||||
|
||||
async fn gc_regions(
|
||||
&self,
|
||||
peer: Peer,
|
||||
region_ids: &[RegionId],
|
||||
file_refs_manifest: &FileRefsManifest,
|
||||
full_file_listing: bool,
|
||||
timeout: Duration,
|
||||
) -> Result<GcReport> {
|
||||
self.gc_regions_inner(
|
||||
peer,
|
||||
region_ids,
|
||||
file_refs_manifest,
|
||||
full_file_listing,
|
||||
timeout,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn get_file_references(
|
||||
&self,
|
||||
query_regions: &[RegionId],
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
region_routes: &Region2Peers,
|
||||
timeout: Duration,
|
||||
) -> Result<FileRefsManifest> {
|
||||
debug!(
|
||||
"Getting file references for {} regions",
|
||||
query_regions.len()
|
||||
);
|
||||
|
||||
// Group regions by datanode to minimize RPC calls
|
||||
let mut datanode2query_regions: HashMap<Peer, Vec<RegionId>> = HashMap::new();
|
||||
|
||||
for region_id in query_regions {
|
||||
if let Some((leader, followers)) = region_routes.get(region_id) {
|
||||
datanode2query_regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.push(*region_id);
|
||||
// also need to send for follower regions for file refs in case query is running on follower
|
||||
for follower in followers {
|
||||
datanode2query_regions
|
||||
.entry(follower.clone())
|
||||
.or_default()
|
||||
.push(*region_id);
|
||||
}
|
||||
} else {
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"region_routes: {region_routes:?} does not contain region_id: {region_id}",
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
let mut datanode2related_regions: HashMap<Peer, HashMap<RegionId, Vec<RegionId>>> =
|
||||
HashMap::new();
|
||||
for (related_region, queries) in related_regions {
|
||||
if let Some((leader, followers)) = region_routes.get(&related_region) {
|
||||
datanode2related_regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.insert(related_region, queries.clone());
|
||||
} // since read from manifest, no need to send to followers
|
||||
}
|
||||
|
||||
// Send GetFileRefs instructions to each datanode
|
||||
let mut all_file_refs: HashMap<RegionId, HashSet<_>> = HashMap::new();
|
||||
let mut all_manifest_versions = HashMap::new();
|
||||
|
||||
for (peer, regions) in datanode2query_regions {
|
||||
let related_regions = datanode2related_regions.remove(&peer).unwrap_or_default();
|
||||
match self
|
||||
.send_get_file_refs_instruction(&peer, ®ions, related_regions, timeout)
|
||||
.await
|
||||
{
|
||||
Ok(manifest) => {
|
||||
// TODO(discord9): if other regions provide file refs for one region on other datanode, and no version,
|
||||
// is it correct to merge manifest_version directly?
|
||||
// FIXME: follower region how to merge version???
|
||||
|
||||
for (region_id, file_refs) in manifest.file_refs {
|
||||
all_file_refs
|
||||
.entry(region_id)
|
||||
.or_default()
|
||||
.extend(file_refs);
|
||||
}
|
||||
// region manifest version should be the smallest one among all peers, so outdated region can be detected
|
||||
for (region_id, version) in manifest.manifest_version {
|
||||
let entry = all_manifest_versions.entry(region_id).or_insert(version);
|
||||
*entry = (*entry).min(version);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to get file refs from datanode {}: {}. Skipping regions on this datanode.",
|
||||
peer, e
|
||||
);
|
||||
// Continue processing other datanodes instead of failing the entire operation
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FileRefsManifest {
|
||||
file_refs: all_file_refs,
|
||||
manifest_version: all_manifest_versions,
|
||||
})
|
||||
self.gc_regions_inner(region_ids, full_file_listing, timeout)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl DefaultGcSchedulerCtx {
|
||||
async fn gc_regions_inner(
|
||||
&self,
|
||||
peer: Peer,
|
||||
region_ids: &[RegionId],
|
||||
file_refs_manifest: &FileRefsManifest,
|
||||
full_file_listing: bool,
|
||||
timeout: Duration,
|
||||
) -> Result<GcReport> {
|
||||
debug!(
|
||||
"Sending GC instruction to datanode {} for {} regions (full_file_listing: {})",
|
||||
peer,
|
||||
"Sending GC instruction for {} regions (full_file_listing: {})",
|
||||
region_ids.len(),
|
||||
full_file_listing
|
||||
);
|
||||
|
||||
let gc_regions = GcRegions {
|
||||
regions: region_ids.to_vec(),
|
||||
file_refs_manifest: file_refs_manifest.clone(),
|
||||
full_file_listing,
|
||||
};
|
||||
let procedure = GcRegionProcedure::new(
|
||||
let procedure = BatchGcProcedure::new(
|
||||
self.mailbox.clone(),
|
||||
self.table_metadata_manager.clone(),
|
||||
self.server_addr.clone(),
|
||||
peer,
|
||||
gc_regions,
|
||||
format!("GC for {} regions", region_ids.len()),
|
||||
region_ids.to_vec(),
|
||||
full_file_listing,
|
||||
timeout,
|
||||
);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
@@ -285,96 +160,8 @@ impl DefaultGcSchedulerCtx {
|
||||
),
|
||||
})?;
|
||||
|
||||
let gc_report = GcRegionProcedure::cast_result(res)?;
|
||||
let gc_report = BatchGcProcedure::cast_result(res)?;
|
||||
|
||||
Ok(gc_report)
|
||||
}
|
||||
|
||||
/// TODO(discord9): add support to read manifest of related regions for file refs too
|
||||
/// (now it's only reading active FileHandles)
|
||||
async fn send_get_file_refs_instruction(
|
||||
&self,
|
||||
peer: &Peer,
|
||||
query_regions: &[RegionId],
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
timeout: Duration,
|
||||
) -> Result<FileRefsManifest> {
|
||||
debug!(
|
||||
"Sending GetFileRefs instruction to datanode {} for {} regions",
|
||||
peer,
|
||||
query_regions.len()
|
||||
);
|
||||
|
||||
let instruction = Instruction::GetFileRefs(GetFileRefs {
|
||||
query_regions: query_regions.to_vec(),
|
||||
related_regions,
|
||||
});
|
||||
|
||||
let reply = self
|
||||
.send_instruction(peer, instruction, "Get file references", timeout)
|
||||
.await?;
|
||||
|
||||
let InstructionReply::GetFileRefs(GetFileRefsReply {
|
||||
file_refs_manifest,
|
||||
success,
|
||||
error,
|
||||
}) = reply
|
||||
else {
|
||||
return error::UnexpectedInstructionReplySnafu {
|
||||
mailbox_message: format!("{:?}", reply),
|
||||
reason: "Unexpected reply of the GetFileRefs instruction",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
if !success {
|
||||
return error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Failed to get file references from datanode {}: {:?}",
|
||||
peer, error
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
Ok(file_refs_manifest)
|
||||
}
|
||||
|
||||
async fn send_instruction(
|
||||
&self,
|
||||
peer: &Peer,
|
||||
instruction: Instruction,
|
||||
description: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<InstructionReply> {
|
||||
let msg = MailboxMessage::json_message(
|
||||
&format!("{}: {}", description, instruction),
|
||||
&format!("Metasrv@{}", self.server_addr),
|
||||
&format!("Datanode-{}@{}", peer.id, peer.addr),
|
||||
common_time::util::current_time_millis(),
|
||||
&instruction,
|
||||
)
|
||||
.with_context(|_| error::SerializeToJsonSnafu {
|
||||
input: instruction.to_string(),
|
||||
})?;
|
||||
|
||||
let mailbox_rx = self
|
||||
.mailbox
|
||||
.send(&Channel::Datanode(peer.id), msg, timeout)
|
||||
.await?;
|
||||
|
||||
match mailbox_rx.await {
|
||||
Ok(reply_msg) => {
|
||||
let reply = HeartbeatMailbox::json_reply(&reply_msg)?;
|
||||
Ok(reply)
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to receive reply from datanode {} for {}: {}",
|
||||
peer, description, e
|
||||
);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,24 +15,17 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_meta::key::table_route::PhysicalTableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use store_api::storage::{FileRefsManifest, GcReport, RegionId};
|
||||
use store_api::storage::{GcReport, RegionId};
|
||||
use table::metadata::TableId;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::gc::candidate::GcCandidate;
|
||||
use crate::gc::scheduler::{GcJobReport, GcScheduler};
|
||||
use crate::gc::tracker::RegionGcInfo;
|
||||
use crate::region;
|
||||
|
||||
pub(crate) type Region2Peers = HashMap<RegionId, (Peer, Vec<Peer>)>;
|
||||
|
||||
pub(crate) type Peer2Regions = HashMap<Peer, HashSet<RegionId>>;
|
||||
|
||||
impl GcScheduler {
|
||||
/// Iterate through all region stats, find region that might need gc, and send gc instruction to
|
||||
@@ -61,6 +54,8 @@ impl GcScheduler {
|
||||
.aggregate_candidates_by_datanode(per_table_candidates)
|
||||
.await?;
|
||||
|
||||
// TODO(discord9): add deleted regions from repartition mapping
|
||||
|
||||
if datanode_to_candidates.is_empty() {
|
||||
info!("No valid datanode candidates found, skipping GC cycle");
|
||||
return Ok(Default::default());
|
||||
@@ -83,17 +78,6 @@ impl GcScheduler {
|
||||
Ok(report)
|
||||
}
|
||||
|
||||
/// Find related regions that might share files with the candidate regions.
|
||||
/// Currently returns the same regions since repartition is not implemented yet.
|
||||
/// TODO(discord9): When repartition is implemented, this should also find src/dst regions
|
||||
/// that might share files with the candidate regions.
|
||||
pub(crate) async fn find_related_regions(
|
||||
&self,
|
||||
candidate_region_ids: &[RegionId],
|
||||
) -> Result<HashMap<RegionId, Vec<RegionId>>> {
|
||||
Ok(candidate_region_ids.iter().map(|&r| (r, vec![r])).collect())
|
||||
}
|
||||
|
||||
/// Aggregate GC candidates by their corresponding datanode peer.
|
||||
pub(crate) async fn aggregate_candidates_by_datanode(
|
||||
&self,
|
||||
@@ -210,28 +194,11 @@ impl GcScheduler {
|
||||
|
||||
let all_region_ids: Vec<RegionId> = candidates.iter().map(|(_, c)| c.region_id).collect();
|
||||
|
||||
let all_related_regions = self.find_related_regions(&all_region_ids).await?;
|
||||
|
||||
let (region_to_peer, _) = self
|
||||
.discover_datanodes_for_regions(&all_related_regions.keys().cloned().collect_vec())
|
||||
.await?;
|
||||
|
||||
// Step 1: Get file references for all regions on this datanode
|
||||
let file_refs_manifest = self
|
||||
.ctx
|
||||
.get_file_references(
|
||||
&all_region_ids,
|
||||
all_related_regions,
|
||||
®ion_to_peer,
|
||||
self.config.mailbox_timeout,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Step 2: Create a single GcRegionProcedure for all regions on this datanode
|
||||
// Step 2: Run GC for all regions on this datanode in a single batch
|
||||
let (gc_report, fully_listed_regions) = {
|
||||
// Partition regions into full listing and fast listing in a single pass
|
||||
|
||||
let mut batch_full_listing_decisions =
|
||||
let batch_full_listing_decisions =
|
||||
self.batch_should_use_full_listing(&all_region_ids).await;
|
||||
|
||||
let need_full_list_regions = batch_full_listing_decisions
|
||||
@@ -242,7 +209,7 @@ impl GcScheduler {
|
||||
},
|
||||
)
|
||||
.collect_vec();
|
||||
let mut fast_list_regions = batch_full_listing_decisions
|
||||
let fast_list_regions = batch_full_listing_decisions
|
||||
.iter()
|
||||
.filter_map(
|
||||
|(®ion_id, &need_full)| {
|
||||
@@ -257,13 +224,7 @@ impl GcScheduler {
|
||||
if !fast_list_regions.is_empty() {
|
||||
match self
|
||||
.ctx
|
||||
.gc_regions(
|
||||
peer.clone(),
|
||||
&fast_list_regions,
|
||||
&file_refs_manifest,
|
||||
false,
|
||||
self.config.mailbox_timeout,
|
||||
)
|
||||
.gc_regions(&fast_list_regions, false, self.config.mailbox_timeout)
|
||||
.await
|
||||
{
|
||||
Ok(report) => combined_report.merge(report),
|
||||
@@ -284,13 +245,7 @@ impl GcScheduler {
|
||||
if !need_full_list_regions.is_empty() {
|
||||
match self
|
||||
.ctx
|
||||
.gc_regions(
|
||||
peer.clone(),
|
||||
&need_full_list_regions,
|
||||
&file_refs_manifest,
|
||||
true,
|
||||
self.config.mailbox_timeout,
|
||||
)
|
||||
.gc_regions(&need_full_list_regions, true, self.config.mailbox_timeout)
|
||||
.await
|
||||
{
|
||||
Ok(report) => combined_report.merge(report),
|
||||
@@ -330,98 +285,6 @@ impl GcScheduler {
|
||||
Ok(gc_report)
|
||||
}
|
||||
|
||||
/// Discover datanodes for the given regions(and it's related regions) by fetching table routes in batches.
|
||||
/// Returns mappings from region to peer(leader, Vec<followers>) and peer to regions.
|
||||
async fn discover_datanodes_for_regions(
|
||||
&self,
|
||||
regions: &[RegionId],
|
||||
) -> Result<(Region2Peers, Peer2Regions)> {
|
||||
let all_related_regions = self
|
||||
.find_related_regions(regions)
|
||||
.await?
|
||||
.into_iter()
|
||||
.flat_map(|(k, mut v)| {
|
||||
v.push(k);
|
||||
v
|
||||
})
|
||||
.collect_vec();
|
||||
let mut region_to_peer = HashMap::new();
|
||||
let mut peer_to_regions = HashMap::new();
|
||||
|
||||
// Group regions by table ID for batch processing
|
||||
let mut table_to_regions: HashMap<TableId, Vec<RegionId>> = HashMap::new();
|
||||
for region_id in all_related_regions {
|
||||
let table_id = region_id.table_id();
|
||||
table_to_regions
|
||||
.entry(table_id)
|
||||
.or_default()
|
||||
.push(region_id);
|
||||
}
|
||||
|
||||
// Process each table's regions together for efficiency
|
||||
for (table_id, table_regions) in table_to_regions {
|
||||
match self.ctx.get_table_route(table_id).await {
|
||||
Ok((_phy_table_id, table_route)) => {
|
||||
self.get_table_regions_peer(
|
||||
&table_route,
|
||||
&table_regions,
|
||||
&mut region_to_peer,
|
||||
&mut peer_to_regions,
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
// Continue with other tables instead of failing completely
|
||||
// TODO(discord9): consider failing here instead
|
||||
warn!(
|
||||
"Failed to get table route for table {}: {}, skipping its regions",
|
||||
table_id, e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((region_to_peer, peer_to_regions))
|
||||
}
|
||||
|
||||
/// Process regions for a single table to find their current leader peers.
|
||||
fn get_table_regions_peer(
|
||||
&self,
|
||||
table_route: &PhysicalTableRouteValue,
|
||||
table_regions: &[RegionId],
|
||||
region_to_peer: &mut Region2Peers,
|
||||
peer_to_regions: &mut Peer2Regions,
|
||||
) {
|
||||
for ®ion_id in table_regions {
|
||||
let mut found = false;
|
||||
|
||||
// Find the region in the table route
|
||||
for region_route in &table_route.region_routes {
|
||||
if region_route.region.id == region_id
|
||||
&& let Some(leader_peer) = ®ion_route.leader_peer
|
||||
{
|
||||
region_to_peer.insert(
|
||||
region_id,
|
||||
(leader_peer.clone(), region_route.follower_peers.clone()),
|
||||
);
|
||||
peer_to_regions
|
||||
.entry(leader_peer.clone())
|
||||
.or_default()
|
||||
.insert(region_id);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
warn!(
|
||||
"Failed to find region {} in table route or no leader peer found",
|
||||
region_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn batch_should_use_full_listing(
|
||||
&self,
|
||||
region_ids: &[RegionId],
|
||||
|
||||
@@ -36,10 +36,9 @@ use store_api::storage::{FileRefsManifest, GcReport, RegionId};
|
||||
use table::metadata::TableId;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
use crate::error::{Result, UnexpectedSnafu};
|
||||
use crate::error::Result;
|
||||
use crate::gc::candidate::GcCandidate;
|
||||
use crate::gc::ctx::SchedulerCtx;
|
||||
use crate::gc::handler::Region2Peers;
|
||||
use crate::gc::options::GcSchedulerOptions;
|
||||
use crate::gc::scheduler::{Event, GcScheduler};
|
||||
|
||||
@@ -67,12 +66,10 @@ pub struct MockSchedulerCtx {
|
||||
pub gc_reports: Arc<Mutex<HashMap<RegionId, GcReport>>>,
|
||||
pub candidates: Arc<Mutex<Option<HashMap<TableId, Vec<GcCandidate>>>>>,
|
||||
pub get_table_to_region_stats_calls: Arc<Mutex<usize>>,
|
||||
pub get_file_references_calls: Arc<Mutex<usize>>,
|
||||
pub gc_regions_calls: Arc<Mutex<usize>>,
|
||||
// Error injection fields for testing
|
||||
pub get_table_to_region_stats_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
pub get_table_route_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
pub get_file_references_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
pub gc_regions_error: Arc<Mutex<Option<crate::error::Error>>>,
|
||||
// Retry testing fields
|
||||
pub gc_regions_retry_count: Arc<Mutex<HashMap<RegionId, usize>>>,
|
||||
@@ -119,57 +116,12 @@ impl MockSchedulerCtx {
|
||||
*self.get_table_route_error.lock().unwrap() = Some(error);
|
||||
}
|
||||
|
||||
/// Set an error to be returned by `get_file_references`
|
||||
#[allow(dead_code)]
|
||||
pub fn with_get_file_references_error(self, error: crate::error::Error) -> Self {
|
||||
*self.get_file_references_error.lock().unwrap() = Some(error);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an error to be returned by `gc_regions`
|
||||
pub fn with_gc_regions_error(self, error: crate::error::Error) -> Self {
|
||||
*self.gc_regions_error.lock().unwrap() = Some(error);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a sequence of errors to be returned by `gc_regions` for retry testing
|
||||
pub fn set_gc_regions_error_sequence(&self, errors: Vec<crate::error::Error>) {
|
||||
*self.gc_regions_error_sequence.lock().unwrap() = errors;
|
||||
}
|
||||
|
||||
/// Set success after a specific number of retries for a region
|
||||
pub fn set_gc_regions_success_after_retries(&self, region_id: RegionId, retries: usize) {
|
||||
self.gc_regions_success_after_retries
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(region_id, retries);
|
||||
}
|
||||
|
||||
/// Get the retry count for a specific region
|
||||
pub fn get_retry_count(&self, region_id: RegionId) -> usize {
|
||||
self.gc_regions_retry_count
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get(®ion_id)
|
||||
.copied()
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Reset all retry tracking
|
||||
pub fn reset_retry_tracking(&self) {
|
||||
*self.gc_regions_retry_count.lock().unwrap() = HashMap::new();
|
||||
*self.gc_regions_error_sequence.lock().unwrap() = Vec::new();
|
||||
*self.gc_regions_success_after_retries.lock().unwrap() = HashMap::new();
|
||||
}
|
||||
|
||||
/// Set an error to be returned for a specific region
|
||||
pub fn set_gc_regions_error_for_region(&self, region_id: RegionId, error: crate::error::Error) {
|
||||
self.gc_regions_per_region_errors
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(region_id, error);
|
||||
}
|
||||
|
||||
/// Clear per-region errors
|
||||
#[allow(unused)]
|
||||
pub fn clear_gc_regions_per_region_errors(&self) {
|
||||
@@ -213,39 +165,9 @@ impl SchedulerCtx for MockSchedulerCtx {
|
||||
.unwrap_or_else(|| (table_id, PhysicalTableRouteValue::default())))
|
||||
}
|
||||
|
||||
async fn get_file_references(
|
||||
&self,
|
||||
query_regions: &[RegionId],
|
||||
_related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
region_to_peer: &Region2Peers,
|
||||
_timeout: Duration,
|
||||
) -> Result<FileRefsManifest> {
|
||||
*self.get_file_references_calls.lock().unwrap() += 1;
|
||||
|
||||
// Check if we should return an injected error
|
||||
if let Some(error) = self.get_file_references_error.lock().unwrap().take() {
|
||||
return Err(error);
|
||||
}
|
||||
if query_regions
|
||||
.iter()
|
||||
.any(|region_id| !region_to_peer.contains_key(region_id))
|
||||
{
|
||||
UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"region_to_peer{region_to_peer:?} does not contain all region_ids requested: {:?}",
|
||||
query_regions
|
||||
),
|
||||
}.fail()?;
|
||||
}
|
||||
|
||||
Ok(self.file_refs.lock().unwrap().clone().unwrap_or_default())
|
||||
}
|
||||
|
||||
async fn gc_regions(
|
||||
&self,
|
||||
_peer: Peer,
|
||||
region_ids: &[RegionId],
|
||||
_file_refs_manifest: &FileRefsManifest,
|
||||
_full_file_listing: bool,
|
||||
_timeout: Duration,
|
||||
) -> Result<GcReport> {
|
||||
|
||||
@@ -152,7 +152,6 @@ async fn test_handle_tick() {
|
||||
);
|
||||
|
||||
assert_eq!(*ctx.get_table_to_region_stats_calls.lock().unwrap(), 1);
|
||||
assert_eq!(*ctx.get_file_references_calls.lock().unwrap(), 1);
|
||||
assert_eq!(*ctx.gc_regions_calls.lock().unwrap(), 1);
|
||||
|
||||
let tracker = scheduler.region_gc_tracker.lock().await;
|
||||
|
||||
@@ -64,6 +64,7 @@ async fn test_gc_regions_failure_handling() {
|
||||
region_id,
|
||||
HashSet::from([FileRef::new(region_id, FileId::random(), None)]),
|
||||
)]),
|
||||
cross_region_refs: HashMap::new(),
|
||||
};
|
||||
|
||||
let ctx = Arc::new(
|
||||
@@ -121,10 +122,6 @@ async fn test_gc_regions_failure_handling() {
|
||||
1,
|
||||
"Expected 1 call to get_table_to_region_stats"
|
||||
);
|
||||
assert!(
|
||||
*ctx.get_file_references_calls.lock().unwrap() >= 1,
|
||||
"Expected at least 1 call to get_file_references"
|
||||
);
|
||||
assert!(
|
||||
*ctx.gc_regions_calls.lock().unwrap() >= 1,
|
||||
"Expected at least 1 call to gc_regions"
|
||||
@@ -206,13 +203,6 @@ async fn test_get_file_references_failure() {
|
||||
datanode_report.deleted_files[®ion_id].is_empty(),
|
||||
"Should have empty deleted files due to file refs failure"
|
||||
);
|
||||
|
||||
// Should still attempt to get file references (may be called multiple times due to retry logic)
|
||||
assert!(
|
||||
*ctx.get_file_references_calls.lock().unwrap() >= 1,
|
||||
"Expected at least 1 call to get_file_references, got {}",
|
||||
*ctx.get_file_references_calls.lock().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -255,42 +245,22 @@ async fn test_get_table_route_failure() {
|
||||
last_tracker_cleanup: Arc::new(tokio::sync::Mutex::new(Instant::now())),
|
||||
};
|
||||
|
||||
// Get candidates first
|
||||
let stats = &ctx
|
||||
.table_to_region_stats
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
let candidates = scheduler.select_gc_candidates(stats).await.unwrap();
|
||||
// Test the full workflow to trigger table route failure during aggregation
|
||||
// The table route failure should cause the entire GC cycle to fail
|
||||
let result = scheduler.handle_tick().await;
|
||||
|
||||
// Convert table-based candidates to datanode-based candidates
|
||||
let datanode_to_candidates = HashMap::from([(
|
||||
Peer::new(1, ""),
|
||||
candidates
|
||||
.into_iter()
|
||||
.flat_map(|(table_id, candidates)| candidates.into_iter().map(move |c| (table_id, c)))
|
||||
.collect(),
|
||||
)]);
|
||||
|
||||
// This should handle table route failure gracefully
|
||||
let report = scheduler
|
||||
.parallel_process_datanodes(datanode_to_candidates)
|
||||
.await;
|
||||
|
||||
// Should process the datanode but handle route error gracefully
|
||||
assert_eq!(
|
||||
report.per_datanode_reports.len(),
|
||||
0,
|
||||
"Expected 0 datanode report"
|
||||
);
|
||||
assert_eq!(
|
||||
report.failed_datanodes.len(),
|
||||
1,
|
||||
"Expected 1 failed datanodes (route error handled gracefully)"
|
||||
);
|
||||
// The table route failure should be propagated as an error
|
||||
assert!(
|
||||
report.failed_datanodes.contains_key(&1),
|
||||
"Failed datanodes should contain the datanode with route error"
|
||||
result.is_err(),
|
||||
"Expected table route failure to propagate as error"
|
||||
);
|
||||
|
||||
// Verify the error message contains our simulated failure
|
||||
let error = result.unwrap_err();
|
||||
let error_msg = format!("{}", error);
|
||||
assert!(
|
||||
error_msg.contains("Simulated table route failure for testing"),
|
||||
"Error message should contain our simulated failure: {}",
|
||||
error_msg
|
||||
);
|
||||
}
|
||||
|
||||
@@ -123,11 +123,6 @@ async fn test_full_gc_workflow() {
|
||||
1,
|
||||
"Expected 1 call to get_table_to_region_stats"
|
||||
);
|
||||
assert_eq!(
|
||||
*ctx.get_file_references_calls.lock().unwrap(),
|
||||
1,
|
||||
"Expected 1 call to get_file_references"
|
||||
);
|
||||
assert_eq!(
|
||||
*ctx.gc_regions_calls.lock().unwrap(),
|
||||
1,
|
||||
|
||||
@@ -73,11 +73,11 @@ impl Default for GcSchedulerOptions {
|
||||
retry_backoff_duration: Duration::from_secs(5),
|
||||
region_gc_concurrency: 16,
|
||||
min_region_size_threshold: 100 * 1024 * 1024, // 100MB
|
||||
sst_count_weight: 1.0,
|
||||
file_removed_count_weight: 0.5,
|
||||
sst_count_weight: 0.5, // more sst means could potentially remove more files, moderate priority
|
||||
file_removed_count_weight: 1.0, // more file to be deleted, higher priority
|
||||
gc_cooldown_period: Duration::from_secs(60 * 5), // 5 minutes
|
||||
regions_per_table_threshold: 20, // Select top 20 regions per table
|
||||
mailbox_timeout: Duration::from_secs(60), // 60 seconds
|
||||
regions_per_table_threshold: 20, // Select top 20 regions per table
|
||||
mailbox_timeout: Duration::from_secs(60), // 60 seconds
|
||||
// Perform full file listing every 24 hours to find orphan files
|
||||
full_file_listing_interval: Duration::from_secs(60 * 60 * 24),
|
||||
// Clean up stale tracker entries every 6 hours
|
||||
|
||||
@@ -19,6 +19,8 @@ use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_meta::instruction::{self, GcRegions, GetFileRefs, GetFileRefsReply, InstructionReply};
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::key::table_route::PhysicalTableRouteValue;
|
||||
use common_meta::lock_key::RegionLock;
|
||||
use common_meta::peer::Peer;
|
||||
use common_procedure::error::ToJsonSnafu;
|
||||
@@ -26,14 +28,16 @@ use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
Result as ProcedureResult, Status,
|
||||
};
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use itertools::Itertools as _;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt as _;
|
||||
use store_api::storage::{FileRefsManifest, GcReport, RegionId};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::{self, Result, SerializeToJsonSnafu};
|
||||
use crate::gc::Region2Peers;
|
||||
use crate::error::{self, KvBackendSnafu, Result, SerializeToJsonSnafu, TableMetadataManagerSnafu};
|
||||
use crate::gc::util::table_route_to_region;
|
||||
use crate::gc::{Peer2Regions, Region2Peers};
|
||||
use crate::handler::HeartbeatMailbox;
|
||||
use crate::service::mailbox::{Channel, MailboxRef};
|
||||
|
||||
@@ -146,56 +150,73 @@ async fn send_gc_regions(
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO(discord9): another procedure which do both get file refs and gc regions.
|
||||
pub struct GcRegionProcedure {
|
||||
/// Procedure to perform get file refs then batch GC for multiple regions,
|
||||
/// it holds locks for all regions during the whole procedure.
|
||||
pub struct BatchGcProcedure {
|
||||
mailbox: MailboxRef,
|
||||
data: GcRegionData,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
data: BatchGcData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct GcRegionData {
|
||||
pub struct BatchGcData {
|
||||
state: State,
|
||||
/// Meta server address
|
||||
server_addr: String,
|
||||
peer: Peer,
|
||||
gc_regions: GcRegions,
|
||||
description: String,
|
||||
/// The regions to be GC-ed
|
||||
regions: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
region_routes: Region2Peers,
|
||||
/// Related regions (e.g., for shared files after repartition).
|
||||
/// The source regions (where those files originally came from) are used as the key, and the destination regions (where files are currently stored) are used as the value.
|
||||
related_regions: HashMap<RegionId, HashSet<RegionId>>,
|
||||
/// Acquired file references (Populated in Acquiring state)
|
||||
file_refs: FileRefsManifest,
|
||||
/// mailbox timeout duration
|
||||
timeout: Duration,
|
||||
gc_report: Option<GcReport>,
|
||||
}
|
||||
|
||||
impl GcRegionProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::GcRegionProcedure";
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum State {
|
||||
/// Initial state
|
||||
Start,
|
||||
/// Fetching file references from datanodes
|
||||
Acquiring,
|
||||
/// Sending GC instruction to the target datanode
|
||||
Gcing,
|
||||
/// Updating region repartition info in kvbackend after GC based on the GC result
|
||||
UpdateRepartition,
|
||||
}
|
||||
|
||||
impl BatchGcProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::BatchGcProcedure";
|
||||
|
||||
pub fn new(
|
||||
mailbox: MailboxRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
server_addr: String,
|
||||
peer: Peer,
|
||||
gc_regions: GcRegions,
|
||||
description: String,
|
||||
regions: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
timeout: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
mailbox,
|
||||
data: GcRegionData {
|
||||
peer,
|
||||
table_metadata_manager,
|
||||
data: BatchGcData {
|
||||
state: State::Start,
|
||||
server_addr,
|
||||
gc_regions,
|
||||
description,
|
||||
regions,
|
||||
full_file_listing,
|
||||
timeout,
|
||||
region_routes: HashMap::new(),
|
||||
related_regions: HashMap::new(),
|
||||
file_refs: FileRefsManifest::default(),
|
||||
gc_report: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_gc_instr(&self) -> Result<GcReport> {
|
||||
send_gc_regions(
|
||||
&self.mailbox,
|
||||
&self.data.peer,
|
||||
self.data.gc_regions.clone(),
|
||||
&self.data.server_addr,
|
||||
self.data.timeout,
|
||||
&self.data.description,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn cast_result(res: Arc<dyn Any>) -> Result<GcReport> {
|
||||
res.downcast_ref::<GcReport>().cloned().ok_or_else(|| {
|
||||
error::UnexpectedSnafu {
|
||||
@@ -207,111 +228,129 @@ impl GcRegionProcedure {
|
||||
.build()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Procedure for GcRegionProcedure {
|
||||
fn type_name(&self) -> &str {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
// Send GC instruction to the datanode. This procedure only handle lock&send, results or other kind of
|
||||
// errors will be reported back via the oneshot channel.
|
||||
let reply = self
|
||||
.send_gc_instr()
|
||||
async fn get_table_route(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<(TableId, PhysicalTableRouteValue)> {
|
||||
self.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_route(table_id)
|
||||
.await
|
||||
.map_err(ProcedureError::external)?;
|
||||
|
||||
Ok(Status::done_with_output(reply))
|
||||
.context(TableMetadataManagerSnafu)
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
serde_json::to_string(&self.data).context(ToJsonSnafu)
|
||||
}
|
||||
|
||||
/// Read lock all regions involved in this GC procedure.
|
||||
/// So i.e. region migration won't happen during GC and cause race conditions.
|
||||
///
|
||||
/// only read lock the regions not catatlog/schema because it can run concurrently with other procedures(i.e. drop database/table)
|
||||
/// TODO:(discord9): integration test to verify this
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let lock_key: Vec<_> = self
|
||||
.data
|
||||
.gc_regions
|
||||
.regions
|
||||
.iter()
|
||||
.sorted() // sort to have a deterministic lock order
|
||||
.map(|id| RegionLock::Read(*id).into())
|
||||
.collect();
|
||||
|
||||
LockKey::new(lock_key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Procedure to perform get file refs then batch GC for multiple regions, should only be used by admin function
|
||||
/// for triggering manual gc, as it holds locks for too long and for all regions during the procedure.
|
||||
pub struct BatchGcProcedure {
|
||||
mailbox: MailboxRef,
|
||||
data: BatchGcData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct BatchGcData {
|
||||
state: State,
|
||||
server_addr: String,
|
||||
/// The regions to be GC-ed
|
||||
regions: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
region_routes: Region2Peers,
|
||||
/// Related regions (e.g., for shared files). Map: RegionId -> List of related RegionIds.
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
/// Acquired file references (Populated in Acquiring state)
|
||||
file_refs: FileRefsManifest,
|
||||
/// mailbox timeout duration
|
||||
timeout: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum State {
|
||||
/// Initial state
|
||||
Start,
|
||||
/// Fetching file references from datanodes
|
||||
Acquiring,
|
||||
/// Sending GC instruction to the target datanode
|
||||
Gcing,
|
||||
}
|
||||
|
||||
impl BatchGcProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::BatchGcProcedure";
|
||||
|
||||
pub fn new(
|
||||
mailbox: MailboxRef,
|
||||
server_addr: String,
|
||||
regions: Vec<RegionId>,
|
||||
full_file_listing: bool,
|
||||
region_routes: Region2Peers,
|
||||
related_regions: HashMap<RegionId, Vec<RegionId>>,
|
||||
timeout: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
mailbox,
|
||||
data: BatchGcData {
|
||||
state: State::Start,
|
||||
server_addr,
|
||||
regions,
|
||||
full_file_listing,
|
||||
region_routes,
|
||||
related_regions,
|
||||
file_refs: FileRefsManifest::default(),
|
||||
timeout,
|
||||
},
|
||||
/// Return related regions for the given regions.
|
||||
/// The returned map uses the source regions (where those files originally came from) as the key,
|
||||
/// and the destination regions (where files are currently stored) as the value.
|
||||
/// If a region is not found in the repartition manager, the returned map still have this region as key,
|
||||
/// just empty value
|
||||
async fn find_related_regions(
|
||||
&self,
|
||||
regions: &[RegionId],
|
||||
) -> Result<HashMap<RegionId, HashSet<RegionId>>> {
|
||||
let repart_mgr = self.table_metadata_manager.table_repart_manager();
|
||||
let mut related_regions: HashMap<RegionId, HashSet<RegionId>> = HashMap::new();
|
||||
for src_region in regions {
|
||||
// TODO(discord9): batch get
|
||||
if let Some(dst_regions) = repart_mgr
|
||||
.get_dst_regions(*src_region)
|
||||
.await
|
||||
.context(KvBackendSnafu)?
|
||||
{
|
||||
related_regions.insert(*src_region, dst_regions.into_iter().collect());
|
||||
} else {
|
||||
related_regions.insert(*src_region, Default::default());
|
||||
}
|
||||
}
|
||||
Ok(related_regions)
|
||||
}
|
||||
|
||||
/// Clean up region repartition info in kvbackend after GC
|
||||
/// according to cross reference in `FileRefsManifest`.
|
||||
async fn cleanup_region_repartition(&self) -> Result<()> {
|
||||
for (src_region, dst_regions) in self.data.file_refs.cross_region_refs.iter() {
|
||||
// TODO(discord9): batch update
|
||||
self.table_metadata_manager
|
||||
.table_repart_manager()
|
||||
.update_mappings(*src_region, &dst_regions.iter().cloned().collect_vec())
|
||||
.await
|
||||
.context(KvBackendSnafu)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Discover region routes for the given regions.
|
||||
async fn discover_route_for_regions(
|
||||
&self,
|
||||
regions: &[RegionId],
|
||||
) -> Result<(Region2Peers, Peer2Regions)> {
|
||||
let mut region_to_peer = HashMap::new();
|
||||
let mut peer_to_regions = HashMap::new();
|
||||
|
||||
// Group regions by table ID for batch processing
|
||||
let mut table_to_regions: HashMap<TableId, Vec<RegionId>> = HashMap::new();
|
||||
for region_id in regions {
|
||||
let table_id = region_id.table_id();
|
||||
table_to_regions
|
||||
.entry(table_id)
|
||||
.or_default()
|
||||
.push(*region_id);
|
||||
}
|
||||
|
||||
// Process each table's regions together for efficiency
|
||||
for (table_id, table_regions) in table_to_regions {
|
||||
match self.get_table_route(table_id).await {
|
||||
Ok((_phy_table_id, table_route)) => {
|
||||
table_route_to_region(
|
||||
&table_route,
|
||||
&table_regions,
|
||||
&mut region_to_peer,
|
||||
&mut peer_to_regions,
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
// Continue with other tables instead of failing completely
|
||||
// TODO(discord9): consider failing here instead
|
||||
warn!(
|
||||
"Failed to get table route for table {}: {}, skipping its regions",
|
||||
table_id, e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((region_to_peer, peer_to_regions))
|
||||
}
|
||||
|
||||
/// Set region routes and related regions for GC procedure
|
||||
async fn set_routes_and_related_regions(&mut self) -> Result<()> {
|
||||
let related_regions = self.find_related_regions(&self.data.regions).await?;
|
||||
|
||||
self.data.related_regions = related_regions.clone();
|
||||
|
||||
// Discover routes for all regions involved in GC, including both the
|
||||
// primary GC regions and their related regions.
|
||||
let mut regions_set: HashSet<RegionId> = self.data.regions.iter().cloned().collect();
|
||||
|
||||
regions_set.extend(related_regions.keys().cloned());
|
||||
regions_set.extend(related_regions.values().flat_map(|v| v.iter()).cloned());
|
||||
|
||||
let regions_to_discover = regions_set.into_iter().collect_vec();
|
||||
|
||||
let (region_to_peer, _) = self
|
||||
.discover_route_for_regions(®ions_to_discover)
|
||||
.await?;
|
||||
|
||||
self.data.region_routes = region_to_peer;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get file references from all datanodes that host the regions
|
||||
async fn get_file_references(&self) -> Result<FileRefsManifest> {
|
||||
use std::collections::{HashMap, HashSet};
|
||||
async fn get_file_references(&mut self) -> Result<FileRefsManifest> {
|
||||
self.set_routes_and_related_regions().await?;
|
||||
|
||||
let query_regions = &self.data.regions;
|
||||
let related_regions = &self.data.related_regions;
|
||||
@@ -344,20 +383,25 @@ impl BatchGcProcedure {
|
||||
}
|
||||
}
|
||||
|
||||
let mut datanode2related_regions: HashMap<Peer, HashMap<RegionId, Vec<RegionId>>> =
|
||||
let mut datanode2related_regions: HashMap<Peer, HashMap<RegionId, HashSet<RegionId>>> =
|
||||
HashMap::new();
|
||||
for (related_region, queries) in related_regions {
|
||||
if let Some((leader, _followers)) = region_routes.get(related_region) {
|
||||
datanode2related_regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.insert(*related_region, queries.clone());
|
||||
} // since read from manifest, no need to send to followers
|
||||
for (src_region, dst_regions) in related_regions {
|
||||
for dst_region in dst_regions {
|
||||
if let Some((leader, _followers)) = region_routes.get(dst_region) {
|
||||
datanode2related_regions
|
||||
.entry(leader.clone())
|
||||
.or_default()
|
||||
.entry(*src_region)
|
||||
.or_default()
|
||||
.insert(*dst_region);
|
||||
} // since read from manifest, no need to send to followers
|
||||
}
|
||||
}
|
||||
|
||||
// Send GetFileRefs instructions to each datanode
|
||||
let mut all_file_refs: HashMap<RegionId, HashSet<_>> = HashMap::new();
|
||||
let mut all_manifest_versions = HashMap::new();
|
||||
let mut all_cross_region_refs = HashMap::new();
|
||||
|
||||
for (peer, regions) in datanode2query_regions {
|
||||
let related_regions_for_peer =
|
||||
@@ -400,17 +444,25 @@ impl BatchGcProcedure {
|
||||
let entry = all_manifest_versions.entry(region_id).or_insert(version);
|
||||
*entry = (*entry).min(version);
|
||||
}
|
||||
|
||||
for (region_id, related_region_ids) in reply.file_refs_manifest.cross_region_refs {
|
||||
let entry = all_cross_region_refs
|
||||
.entry(region_id)
|
||||
.or_insert_with(HashSet::new);
|
||||
entry.extend(related_region_ids);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FileRefsManifest {
|
||||
file_refs: all_file_refs,
|
||||
manifest_version: all_manifest_versions,
|
||||
cross_region_refs: all_cross_region_refs,
|
||||
})
|
||||
}
|
||||
|
||||
/// Send GC instruction to all datanodes that host the regions,
|
||||
/// returns regions that need retry.
|
||||
async fn send_gc_instructions(&self) -> Result<Vec<RegionId>> {
|
||||
async fn send_gc_instructions(&self) -> Result<GcReport> {
|
||||
let regions = &self.data.regions;
|
||||
let region_routes = &self.data.region_routes;
|
||||
let file_refs = &self.data.file_refs;
|
||||
@@ -418,6 +470,7 @@ impl BatchGcProcedure {
|
||||
|
||||
// Group regions by datanode
|
||||
let mut datanode2regions: HashMap<Peer, Vec<RegionId>> = HashMap::new();
|
||||
let mut all_report = GcReport::default();
|
||||
|
||||
for region_id in regions {
|
||||
if let Some((leader, _followers)) = region_routes.get(region_id) {
|
||||
@@ -469,10 +522,15 @@ impl BatchGcProcedure {
|
||||
peer, success, need_retry
|
||||
);
|
||||
}
|
||||
all_need_retry.extend(report.need_retry_regions);
|
||||
all_need_retry.extend(report.need_retry_regions.clone());
|
||||
all_report.merge(report);
|
||||
}
|
||||
|
||||
Ok(all_need_retry.into_iter().collect())
|
||||
if !all_need_retry.is_empty() {
|
||||
warn!("Regions need retry after batch GC: {:?}", all_need_retry);
|
||||
}
|
||||
|
||||
Ok(all_report)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,12 +565,10 @@ impl Procedure for BatchGcProcedure {
|
||||
// Send GC instructions to all datanodes
|
||||
// TODO(discord9): handle need-retry regions
|
||||
match self.send_gc_instructions().await {
|
||||
Ok(_) => {
|
||||
info!(
|
||||
"Batch GC completed successfully for regions {:?}",
|
||||
self.data.regions
|
||||
);
|
||||
Ok(Status::done())
|
||||
Ok(report) => {
|
||||
self.data.state = State::UpdateRepartition;
|
||||
self.data.gc_report = Some(report);
|
||||
Ok(Status::executing(false))
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to send GC instructions: {}", e);
|
||||
@@ -520,6 +576,29 @@ impl Procedure for BatchGcProcedure {
|
||||
}
|
||||
}
|
||||
}
|
||||
State::UpdateRepartition => match self.cleanup_region_repartition().await {
|
||||
Ok(()) => {
|
||||
info!(
|
||||
"Cleanup region repartition info completed successfully for regions {:?}",
|
||||
self.data.regions
|
||||
);
|
||||
info!(
|
||||
"Batch GC completed successfully for regions {:?}",
|
||||
self.data.regions
|
||||
);
|
||||
let Some(report) = self.data.gc_report.take() else {
|
||||
return common_procedure::error::UnexpectedSnafu {
|
||||
err_msg: "GC report should be present after GC completion".to_string(),
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
Ok(Status::done_with_output(report))
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to cleanup region repartition info: {}", e);
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
@@ -38,29 +38,6 @@ pub struct GcJobReport {
|
||||
pub per_datanode_reports: HashMap<DatanodeId, GcReport>,
|
||||
pub failed_datanodes: HashMap<DatanodeId, Vec<Error>>,
|
||||
}
|
||||
impl GcJobReport {
|
||||
pub fn merge(&mut self, mut other: GcJobReport) {
|
||||
// merge per_datanode_reports&failed_datanodes
|
||||
for (dn_id, report) in other.per_datanode_reports {
|
||||
let self_report = self.per_datanode_reports.entry(dn_id).or_default();
|
||||
self_report.merge(report);
|
||||
}
|
||||
let all_failed_dn_ids = self
|
||||
.failed_datanodes
|
||||
.keys()
|
||||
.cloned()
|
||||
.chain(other.failed_datanodes.keys().cloned())
|
||||
.collect::<HashSet<_>>();
|
||||
for dn_id in all_failed_dn_ids {
|
||||
let entry = self.failed_datanodes.entry(dn_id).or_default();
|
||||
if let Some(other_errors) = other.failed_datanodes.remove(&dn_id) {
|
||||
entry.extend(other_errors);
|
||||
}
|
||||
}
|
||||
self.failed_datanodes
|
||||
.retain(|dn_id, _| !self.per_datanode_reports.contains_key(dn_id));
|
||||
}
|
||||
}
|
||||
|
||||
/// [`Event`] represents various types of events that can be processed by the gc ticker.
|
||||
///
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user