mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
Compare commits
14 Commits
c112cdf241
...
v0.16.0-ni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
572e29b158 | ||
|
|
31cb769507 | ||
|
|
e19493db4a | ||
|
|
9817eb934d | ||
|
|
8639961cc9 | ||
|
|
a9cd117706 | ||
|
|
9485dbed64 | ||
|
|
21b71d1e10 | ||
|
|
cfaa9b4dda | ||
|
|
19ad9a7f85 | ||
|
|
9e2f793b04 | ||
|
|
52466fdd92 | ||
|
|
869f8bf68a | ||
|
|
9527e0df2f |
300
Cargo.lock
generated
300
Cargo.lock
generated
@@ -17,6 +17,12 @@ version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
|
||||
|
||||
[[package]]
|
||||
name = "adler32"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
||||
|
||||
[[package]]
|
||||
name = "aead"
|
||||
version = "0.5.2"
|
||||
@@ -1425,6 +1431,31 @@ dependencies = [
|
||||
"piper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon"
|
||||
version = "3.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33d9ef19ae5263a138da9a86871eca537478ab0332a7770bac7e3f08b801f89f"
|
||||
dependencies = [
|
||||
"bon-macros",
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon-macros"
|
||||
version = "3.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "577ae008f2ca11ca7641bd44601002ee5ab49ef0af64846ce1ab6057218a5cc1"
|
||||
dependencies = [
|
||||
"darling 0.21.0",
|
||||
"ident_case",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "borsh"
|
||||
version = "1.5.1"
|
||||
@@ -1828,7 +1859,7 @@ checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"chrono-tz-build",
|
||||
"phf",
|
||||
"phf 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1838,7 +1869,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7"
|
||||
dependencies = [
|
||||
"parse-zoneinfo",
|
||||
"phf_codegen",
|
||||
"phf_codegen 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2100,6 +2131,7 @@ dependencies = [
|
||||
"common-error",
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-mem-prof",
|
||||
"common-meta",
|
||||
"common-options",
|
||||
"common-procedure",
|
||||
@@ -2547,6 +2579,9 @@ dependencies = [
|
||||
"rand 0.9.0",
|
||||
"regex",
|
||||
"rskafka",
|
||||
"rustls",
|
||||
"rustls-native-certs 0.7.3",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
@@ -2558,6 +2593,7 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
"typetag",
|
||||
@@ -3260,6 +3296,16 @@ dependencies = [
|
||||
"darling_macro 0.20.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a79c4acb1fd5fa3d9304be4c76e031c54d2e92d172a393e24b19a14fe8532fe9"
|
||||
dependencies = [
|
||||
"darling_core 0.21.0",
|
||||
"darling_macro 0.21.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.14.4"
|
||||
@@ -3288,6 +3334,20 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74875de90daf30eb59609910b84d4d368103aaec4c924824c6799b28f77d6a1d"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim 0.11.1",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.14.4"
|
||||
@@ -3310,6 +3370,23 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e79f8e61677d5df9167cd85265f8e5f64b215cdea3fb55eebc3e622e44c7a146"
|
||||
dependencies = [
|
||||
"darling_core 0.21.0",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dary_heap"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "6.1.0"
|
||||
@@ -3799,6 +3876,7 @@ dependencies = [
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-options",
|
||||
"common-procedure",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
@@ -4235,9 +4313,9 @@ checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1"
|
||||
|
||||
[[package]]
|
||||
name = "downcast-rs"
|
||||
version = "1.2.1"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
|
||||
checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf"
|
||||
|
||||
[[package]]
|
||||
name = "duration-str"
|
||||
@@ -4680,6 +4758,7 @@ dependencies = [
|
||||
"common-grpc",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-options",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
"common-runtime",
|
||||
@@ -5980,6 +6059,29 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed"
|
||||
|
||||
[[package]]
|
||||
name = "include-flate"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df49c16750695486c1f34de05da5b7438096156466e7f76c38fcdf285cf0113e"
|
||||
dependencies = [
|
||||
"include-flate-codegen",
|
||||
"lazy_static",
|
||||
"libflate",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "include-flate-codegen"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c5b246c6261be723b85c61ecf87804e8ea4a35cb68be0ff282ed84b95ffe7d7"
|
||||
dependencies = [
|
||||
"libflate",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "include_dir"
|
||||
version = "0.7.4"
|
||||
@@ -6168,9 +6270,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6300,17 +6399,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
|
||||
|
||||
[[package]]
|
||||
name = "jieba-rs"
|
||||
version = "0.7.0"
|
||||
name = "jieba-macros"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1e2b0210dc78b49337af9e49d7ae41a39dceac6e5985613f1cf7763e2f76a25"
|
||||
checksum = "6105f38f083bb1a79ad523bd32fa0d8ffcb6abd2fc4da9da203c32bca5b6ace3"
|
||||
dependencies = [
|
||||
"phf_codegen 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jieba-rs"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47982a320106da83b0c5d6aec0fb83e109f0132b69670b063adaa6fa5b4f3f4a"
|
||||
dependencies = [
|
||||
"cedarwood",
|
||||
"derive_builder 0.20.1",
|
||||
"fxhash",
|
||||
"lazy_static",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"include-flate",
|
||||
"jieba-macros",
|
||||
"phf 0.12.1",
|
||||
"regex",
|
||||
]
|
||||
|
||||
@@ -6751,6 +6858,30 @@ version = "0.2.171"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
|
||||
|
||||
[[package]]
|
||||
name = "libflate"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e"
|
||||
dependencies = [
|
||||
"adler32",
|
||||
"core2",
|
||||
"crc32fast",
|
||||
"dary_heap",
|
||||
"libflate_lz77",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libflate_lz77"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d"
|
||||
dependencies = [
|
||||
"core2",
|
||||
"hashbrown 0.14.5",
|
||||
"rle-decode-fast",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libfuzzer-sys"
|
||||
version = "0.4.7"
|
||||
@@ -6781,7 +6912,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7142,11 +7273,10 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "measure_time"
|
||||
version = "0.8.3"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc"
|
||||
checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e"
|
||||
dependencies = [
|
||||
"instant",
|
||||
"log",
|
||||
]
|
||||
|
||||
@@ -8703,9 +8833,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "ownedbytes"
|
||||
version = "0.7.0"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558"
|
||||
checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
@@ -9062,7 +9192,17 @@ version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"phf_shared 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
|
||||
dependencies = [
|
||||
"phf_shared 0.12.1",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9071,8 +9211,18 @@ version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"phf_generator 0.11.2",
|
||||
"phf_shared 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "efbdcb6f01d193b17f0b9c3360fa7e0e620991b193ff08702f78b3ce365d7e61"
|
||||
dependencies = [
|
||||
"phf_generator 0.12.1",
|
||||
"phf_shared 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9081,10 +9231,20 @@ version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"phf_shared 0.11.2",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2cbb1126afed61dd6368748dae63b1ee7dc480191c6262a3b4ff1e29d86a6c5b"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"phf_shared 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.2"
|
||||
@@ -9094,6 +9254,15 @@ dependencies = [
|
||||
"siphasher 0.3.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
|
||||
dependencies = [
|
||||
"siphasher 1.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.5"
|
||||
@@ -9682,7 +9851,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"itertools 0.11.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
@@ -9728,7 +9897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.11.0",
|
||||
"itertools 0.14.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
@@ -10306,9 +10475,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.0"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@@ -10581,6 +10750,12 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rle-decode-fast"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.9"
|
||||
@@ -11149,9 +11324,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.217"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
||||
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
@@ -11168,9 +11343,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.217"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
||||
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -11625,9 +11800,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
|
||||
|
||||
[[package]]
|
||||
name = "sketches-ddsketch"
|
||||
version = "0.2.2"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
|
||||
checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
@@ -12197,7 +12372,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"parking_lot 0.12.3",
|
||||
"phf_shared",
|
||||
"phf_shared 0.11.2",
|
||||
"precomputed-hash",
|
||||
]
|
||||
|
||||
@@ -12556,14 +12731,15 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.22.0"
|
||||
version = "0.24.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8d0582f186c0a6d55655d24543f15e43607299425c5ad8352c242b914b31856"
|
||||
checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"arc-swap",
|
||||
"base64 0.22.1",
|
||||
"bitpacking",
|
||||
"bon",
|
||||
"byteorder",
|
||||
"census",
|
||||
"crc32fast",
|
||||
@@ -12573,20 +12749,20 @@ dependencies = [
|
||||
"fnv",
|
||||
"fs4",
|
||||
"htmlescape",
|
||||
"itertools 0.12.1",
|
||||
"hyperloglogplus",
|
||||
"itertools 0.14.0",
|
||||
"levenshtein_automata",
|
||||
"log",
|
||||
"lru",
|
||||
"lz4_flex",
|
||||
"measure_time",
|
||||
"memmap2",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"oneshot",
|
||||
"rayon",
|
||||
"regex",
|
||||
"rust-stemmers",
|
||||
"rustc-hash 1.1.0",
|
||||
"rustc-hash 2.0.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sketches-ddsketch",
|
||||
@@ -12599,7 +12775,7 @@ dependencies = [
|
||||
"tantivy-stacker",
|
||||
"tantivy-tokenizer-api",
|
||||
"tempfile",
|
||||
"thiserror 1.0.64",
|
||||
"thiserror 2.0.12",
|
||||
"time",
|
||||
"uuid",
|
||||
"winapi",
|
||||
@@ -12608,22 +12784,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-bitpacker"
|
||||
version = "0.6.0"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df"
|
||||
checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494"
|
||||
dependencies = [
|
||||
"bitpacking",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-columnar"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e"
|
||||
checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344"
|
||||
dependencies = [
|
||||
"downcast-rs",
|
||||
"fastdivide",
|
||||
"itertools 0.12.1",
|
||||
"itertools 0.14.0",
|
||||
"serde",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
@@ -12633,9 +12809,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-common"
|
||||
version = "0.7.0"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4"
|
||||
checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -12657,9 +12833,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-jieba"
|
||||
version = "0.11.0"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f2fe65c125f0d76d06f0f2ce9fbb9287b53f0dafb51a6270d984a840e2f16c1"
|
||||
checksum = "3b08147cc130e323ecc522117927b198bec617fe1df562a0b6449905858d0363"
|
||||
dependencies = [
|
||||
"jieba-rs",
|
||||
"lazy_static",
|
||||
@@ -12668,19 +12844,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-query-grammar"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82"
|
||||
checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a"
|
||||
dependencies = [
|
||||
"nom",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-sstable"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e"
|
||||
checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"itertools 0.14.0",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-fst",
|
||||
@@ -12689,9 +12869,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-stacker"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8"
|
||||
checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1"
|
||||
dependencies = [
|
||||
"murmurhash32",
|
||||
"rand_distr",
|
||||
@@ -12700,9 +12880,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-tokenizer-api"
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04"
|
||||
checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
@@ -13165,7 +13345,7 @@ dependencies = [
|
||||
"log",
|
||||
"parking_lot 0.12.3",
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"phf 0.11.2",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
@@ -14373,7 +14553,7 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -207,6 +207,8 @@
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
|
||||
|
||||
## Distributed Mode
|
||||
@@ -311,6 +313,8 @@
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
|
||||
|
||||
### Metasrv
|
||||
@@ -333,6 +337,12 @@
|
||||
| `runtime` | -- | -- | The runtime options. |
|
||||
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
|
||||
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
|
||||
| `backend_tls` | -- | -- | TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)<br/>When using PostgreSQL or MySQL as metadata store, you can configure TLS here |
|
||||
| `backend_tls.mode` | String | `prefer` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- "disable" - No TLS<br/>- "prefer" (default) - Try TLS, fallback to plain<br/>- "require" - Require TLS<br/>- "verify_ca" - Require TLS and verify CA<br/>- "verify_full" - Require TLS and verify hostname |
|
||||
| `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
|
||||
| `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
|
||||
| `backend_tls.ca_cert_path` | String | `""` | Path to CA certificate file (for server certificate verification)<br/>Required when using custom CAs or self-signed certificates<br/>Leave empty to use system root certificates only<br/>Like "/path/to/ca.crt" |
|
||||
| `backend_tls.watch` | Bool | `false` | Watch for certificate file changes and auto reload |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
|
||||
@@ -389,6 +399,8 @@
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
|
||||
|
||||
### Datanode
|
||||
@@ -554,6 +566,8 @@
|
||||
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
|
||||
|
||||
### Flownode
|
||||
@@ -611,3 +625,5 @@
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
| `query` | -- | -- | -- |
|
||||
| `query.parallelism` | Integer | `1` | Parallelism of the query engine for query sent by flownode.<br/>Default to 1, so it won't use too much cpu or memory |
|
||||
| `memory` | -- | -- | The memory options. |
|
||||
| `memory.enable_heap_profiling` | Bool | `true` | Whether to enable heap profiling activation during startup.<br/>When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable<br/>is set to "prof:true,prof_active:false". The official image adds this env variable.<br/>Default is true. |
|
||||
|
||||
@@ -669,3 +669,11 @@ headers = { }
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
## The memory options.
|
||||
[memory]
|
||||
## Whether to enable heap profiling activation during startup.
|
||||
## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
## is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
## Default is true.
|
||||
enable_heap_profiling = true
|
||||
|
||||
@@ -136,3 +136,11 @@ default_ratio = 1.0
|
||||
## Parallelism of the query engine for query sent by flownode.
|
||||
## Default to 1, so it won't use too much cpu or memory
|
||||
parallelism = 1
|
||||
|
||||
## The memory options.
|
||||
[memory]
|
||||
## Whether to enable heap profiling activation during startup.
|
||||
## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
## is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
## Default is true.
|
||||
enable_heap_profiling = true
|
||||
|
||||
@@ -280,3 +280,11 @@ headers = { }
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
## The memory options.
|
||||
[memory]
|
||||
## Whether to enable heap profiling activation during startup.
|
||||
## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
## is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
## Default is true.
|
||||
enable_heap_profiling = true
|
||||
|
||||
@@ -65,6 +65,34 @@ node_max_idle_time = "24hours"
|
||||
## The number of threads to execute the runtime for global write operations.
|
||||
#+ compact_rt_size = 4
|
||||
|
||||
## TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)
|
||||
## When using PostgreSQL or MySQL as metadata store, you can configure TLS here
|
||||
[backend_tls]
|
||||
## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
## - "disable" - No TLS
|
||||
## - "prefer" (default) - Try TLS, fallback to plain
|
||||
## - "require" - Require TLS
|
||||
## - "verify_ca" - Require TLS and verify CA
|
||||
## - "verify_full" - Require TLS and verify hostname
|
||||
mode = "prefer"
|
||||
|
||||
## Path to client certificate file (for client authentication)
|
||||
## Like "/path/to/client.crt"
|
||||
cert_path = ""
|
||||
|
||||
## Path to client private key file (for client authentication)
|
||||
## Like "/path/to/client.key"
|
||||
key_path = ""
|
||||
|
||||
## Path to CA certificate file (for server certificate verification)
|
||||
## Required when using custom CAs or self-signed certificates
|
||||
## Leave empty to use system root certificates only
|
||||
## Like "/path/to/ca.crt"
|
||||
ca_cert_path = ""
|
||||
|
||||
## Watch for certificate file changes and auto reload
|
||||
watch = false
|
||||
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
## The address to bind the gRPC server.
|
||||
@@ -265,3 +293,11 @@ headers = { }
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
## The memory options.
|
||||
[memory]
|
||||
## Whether to enable heap profiling activation during startup.
|
||||
## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
## is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
## Default is true.
|
||||
enable_heap_profiling = true
|
||||
|
||||
@@ -783,3 +783,11 @@ headers = { }
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
## The memory options.
|
||||
[memory]
|
||||
## Whether to enable heap profiling activation during startup.
|
||||
## When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
## is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
## Default is true.
|
||||
enable_heap_profiling = true
|
||||
|
||||
@@ -47,4 +47,6 @@ WORKDIR /greptime
|
||||
COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
|
||||
ENV PATH /greptime/bin/:$PATH
|
||||
|
||||
ENV MALLOC_CONF="prof:true,prof_active:false"
|
||||
|
||||
ENTRYPOINT ["greptime"]
|
||||
|
||||
@@ -47,4 +47,6 @@ WORKDIR /greptime
|
||||
COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
|
||||
ENV PATH /greptime/bin/:$PATH
|
||||
|
||||
ENV MALLOC_CONF="prof:true,prof_active:false"
|
||||
|
||||
ENTRYPOINT ["greptime"]
|
||||
|
||||
@@ -15,4 +15,6 @@ ADD $TARGETARCH/greptime /greptime/bin/
|
||||
|
||||
ENV PATH /greptime/bin/:$PATH
|
||||
|
||||
ENV MALLOC_CONF="prof:true,prof_active:false"
|
||||
|
||||
ENTRYPOINT ["greptime"]
|
||||
|
||||
@@ -18,4 +18,6 @@ ENV PATH /greptime/bin/:$PATH
|
||||
|
||||
ENV TARGET_BIN=$TARGET_BIN
|
||||
|
||||
ENV MALLOC_CONF="prof:true,prof_active:false"
|
||||
|
||||
ENTRYPOINT ["sh", "-c", "exec $TARGET_BIN \"$@\"", "--"]
|
||||
|
||||
@@ -30,6 +30,23 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph
|
||||
|
||||
## Profiling
|
||||
|
||||
### Configuration
|
||||
|
||||
You can control heap profiling activation through configuration. Add the following to your configuration file:
|
||||
|
||||
```toml
|
||||
[memory]
|
||||
# Whether to enable heap profiling activation during startup.
|
||||
# When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
# is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
# Default is true.
|
||||
enable_heap_profiling = true
|
||||
```
|
||||
|
||||
By default, if you set `MALLOC_CONF=prof:true,prof_active:false`, the database will enable profiling during startup. You can disable this behavior by setting `enable_heap_profiling = false` in the configuration.
|
||||
|
||||
### Starting with environment variables
|
||||
|
||||
Start GreptimeDB instance with environment variables:
|
||||
|
||||
```bash
|
||||
@@ -40,6 +57,23 @@ MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
_RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
```
|
||||
|
||||
### Memory profiling control
|
||||
|
||||
You can control heap profiling activation using the new HTTP APIs:
|
||||
|
||||
```bash
|
||||
# Check current profiling status
|
||||
curl -X GET localhost:4000/debug/prof/mem/status
|
||||
|
||||
# Activate heap profiling (if not already active)
|
||||
curl -X POST localhost:4000/debug/prof/mem/activate
|
||||
|
||||
# Deactivate heap profiling
|
||||
curl -X POST localhost:4000/debug/prof/mem/deactivate
|
||||
```
|
||||
|
||||
### Dump memory profiling data
|
||||
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
```bash
|
||||
|
||||
151
docs/rfcs/2025-07-04-compatibility-test-framework.md
Normal file
151
docs/rfcs/2025-07-04-compatibility-test-framework.md
Normal file
@@ -0,0 +1,151 @@
|
||||
---
|
||||
Feature Name: Compatibility Test Framework
|
||||
Tracking Issue: TBD
|
||||
Date: 2025-07-04
|
||||
Author: "Ruihang Xia <waynestxia@gmail.com>"
|
||||
---
|
||||
|
||||
# Summary
|
||||
|
||||
This RFC proposes a compatibility test framework for GreptimeDB to ensure backward/forward compatibility for different versions of GreptimeDB.
|
||||
|
||||
# Motivation
|
||||
|
||||
In current practice, we don't have a systematic way to test and ensure the compatibility of different versions of GreptimeDB. Each time we release a new version, we need to manually test the compatibility with ad-hoc cases. This is not only time-consuming, but also prone to errors and unmaintainable. Highly rely on the release manager to ensure the compatibility of different versions of GreptimeDB.
|
||||
|
||||
We don't have a detailed guide on the release SoP of how to test and ensure the compatibility of the new version. And has broken the compatibility of the new version many times (`v0.14.1` and `v0.15.1` are two examples, which are both released right after the major release).
|
||||
|
||||
# Details
|
||||
|
||||
This RFC proposes a compatibility test framework that is easy to maintain, extend and run. It can tell the compatibility between any given two versions of GreptimeDB, both backward and forward. It's based on the Sqlness library but used in a different way.
|
||||
|
||||
Generally speaking, the framework is composed of two parts:
|
||||
|
||||
1. Test cases: A set of test cases that are maintained dedicatedly for the compatibility test. Still in the `.sql` and `.result` format.
|
||||
2. Test framework: A new sqlness runner that is used to run the test cases. With some new features that is not required by the integration sqlness test.
|
||||
|
||||
## Test Cases
|
||||
|
||||
### Structure
|
||||
|
||||
The case set is organized in three parts:
|
||||
|
||||
- `1.feature`: Use a new feature
|
||||
- `2.verify`: Verify database behavior
|
||||
- `3.cleanup`: Paired with `1.feature`, cleanup the test environment.
|
||||
|
||||
These three parts are organized in a tree structure, and should be run in sequence:
|
||||
|
||||
```
|
||||
compatibility_test/
|
||||
├── 1.feature/
|
||||
│ ├── feature-a/
|
||||
│ ├── feature-b/
|
||||
│ └── feature-c/
|
||||
├── 2.verify/
|
||||
│ ├── verify-metadata/
|
||||
│ ├── verify-data/
|
||||
│ └── verify-schema/
|
||||
└── 3.cleanup/
|
||||
├── cleanup-a/
|
||||
├── cleanup-b/
|
||||
└── cleanup-c/
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
For example, for a new feature like adding new index option ([#6416](https://github.com/GreptimeTeam/greptimedb/pull/6416)), we (who implement the feature) create a new test case like this:
|
||||
|
||||
```sql
|
||||
-- path: compatibility_test/1.feature/index-option/granularity_and_false_positive_rate.sql
|
||||
|
||||
-- SQLNESS ARG since=0.15.0
|
||||
-- SQLNESS IGNORE_RESULT
|
||||
CREATE TABLE granularity_and_false_positive_rate (ts timestamp time index, val double) with ("index.granularity" = "8192", "index.false_positive_rate" = "0.01");
|
||||
```
|
||||
|
||||
And
|
||||
|
||||
```sql
|
||||
-- path: compatibility_test/3.cleanup/index-option/granularity_and_false_positive_rate.sql
|
||||
drop table granularity_and_false_positive_rate;
|
||||
```
|
||||
|
||||
Since this new feature don't require some special way to verify the database behavior, we can reuse existing test cases in `2.verify/` to verify the database behavior. For example, we can reuse the `verify-metadata` test case to verify the metadata of the table.
|
||||
|
||||
```sql
|
||||
-- path: compatibility_test/2.verify/verify-metadata/show-create-table.sql
|
||||
|
||||
-- SQLNESS TEMPLATE TABLE="SHOW TABLES";
|
||||
SHOW CREATE TABLE $TABLE;
|
||||
```
|
||||
|
||||
In this example, we use some new sqlness features that will be introduced in the next section (`since`, `IGNORE_RESULT`, `TEMPLATE`).
|
||||
|
||||
### Maintenance
|
||||
|
||||
Each time implement a new feature that should be covered by the compatibility test, we should create a new test case in `1.feature/` and `3.cleanup/` for them. And check if existing cases in `2.verify/` can be reused to verify the database behavior.
|
||||
|
||||
This simulates an enthusiastic user who uses all the new features at the first time. All the new Maintenance burden is on the feature implementer to write one more test case for the new feature, to "fixation" the behavior. And once there is a breaking change in the future, it can be detected by the compatibility test framework automatically.
|
||||
|
||||
Another topic is about deprecation. If a feature is deprecated, we should also mark it in the test case. Still use above example, assume we deprecate the `index.granularity` and `index.false_positive_rate` index options in `v0.99.0`, we can mark them as:
|
||||
```sql
|
||||
-- SQLNESS ARG since=0.15.0 till=0.99.0
|
||||
...
|
||||
```
|
||||
|
||||
This tells the framework to ignore this feature in version `v0.99.0` and later. Currently, we have so many experimental features that are scheduled to be broken in the future, this is a good way to mark them.
|
||||
|
||||
## Test Framework
|
||||
|
||||
This section is about new sqlness features required by this framework.
|
||||
|
||||
### Since and Till
|
||||
|
||||
Follows the `ARG` interceptor in sqlness, we can mark a feature is available between two given versions. Only the `since` is required:
|
||||
|
||||
```sql
|
||||
-- SQLNESS ARG since=VERSION_STRING [till=VERSION_STRING]
|
||||
```
|
||||
|
||||
### IGNORE_RESULT
|
||||
|
||||
`IGNORE_RESULT` is a new interceptor, it tells the runner to ignore the result of the query, only check whether the query is executed successfully.
|
||||
|
||||
This is useful to reduce the Maintenance burden of the test cases, unlike the integration sqlness test, in most cases we don't care about the result of the query, only need to make sure the query is executed successfully.
|
||||
|
||||
### TEMPLATE
|
||||
|
||||
`TEMPLATE` is another new interceptor, it can generate queries from a template based on a runtime data.
|
||||
|
||||
In above example, we need to run the `SHOW CREATE TABLE` query for all existing tables, so we can use the `TEMPLATE` interceptor to generate the query with a dynamic table list.
|
||||
|
||||
### RUNNER
|
||||
|
||||
There are also some extra requirement for the runner itself:
|
||||
|
||||
- It should run the test cases in sequence, first `1.feature/`, then `2.verify/`, and finally `3.cleanup/`.
|
||||
- It should be able to fetch required version automatically to finish the test.
|
||||
- It should handle the `since` and `till` properly.
|
||||
|
||||
On the `1.feature` phase, the runner needs to identify all features need to be tested by version number. And then restart with a new version (the `to` version) to run `2.verify/` and `3.cleanup/` phase.
|
||||
|
||||
## Test Report
|
||||
|
||||
Finally, we can run the compatibility test to verify the compatibility between any given two versions of GreptimeDB, for example:
|
||||
|
||||
```bash
|
||||
# check backward compatibility between v0.15.0 and v0.16.0 when releasing v0.16.0
|
||||
./sqlness run --from=0.15.0 --to=0.16.0
|
||||
|
||||
# check forward compatibility when downgrading from v0.15.0 to v0.13.0
|
||||
./sqlness run --from=0.15.0 --to=0.13.0
|
||||
```
|
||||
|
||||
We can also use a script to run the compatibility test for all the versions in a given range to give a quick report with all versions we need.
|
||||
|
||||
And we always bump the version in `Cargo.toml` to the next major release version, so the next major release version can be used as "latest" unpublished version for scenarios like local testing.
|
||||
|
||||
# Alternatives
|
||||
|
||||
There was a previous attempt to implement a compatibility test framework that was disabled due to some reasons [#3728](https://github.com/GreptimeTeam/greptimedb/issues/3728).
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,14 +21,14 @@
|
||||
# Resources
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
|
||||
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
|
||||
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
|
||||
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-datanode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{instance}}]-[{{ pod }}]` |
|
||||
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$datanode"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-frontend"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$frontend"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
|
||||
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
|
||||
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$metasrv"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-flownode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{instance=~"$flownode"}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
# Frontend Requests
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
@@ -72,6 +72,7 @@
|
||||
| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
|
||||
| Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
|
||||
| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
|
||||
| Cache Miss | `sum by (instance,pod, type) (rate(greptime_mito_cache_miss{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | The local cache miss of the datanode. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
# OpenDAL
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
|
||||
@@ -180,13 +180,18 @@ groups:
|
||||
- title: Datanode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$datanode"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{ pod }}]'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-datanode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Datanode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -197,16 +202,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Frontend Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$frontend"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-frontend"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Frontend CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -217,16 +232,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Metasrv Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$metasrv"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Metasrv CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -237,16 +262,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Flownode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{instance=~"$flownode"}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-flownode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Flownode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -257,6 +292,11 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Frontend Requests
|
||||
panels:
|
||||
- title: HTTP QPS per Instance
|
||||
@@ -642,6 +682,15 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
|
||||
- title: Cache Miss
|
||||
type: timeseries
|
||||
description: The local cache miss of the datanode.
|
||||
queries:
|
||||
- expr: sum by (instance,pod, type) (rate(greptime_mito_cache_miss{instance=~"$datanode"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: OpenDAL
|
||||
panels:
|
||||
- title: QPS per Instance
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,14 +21,14 @@
|
||||
# Resources
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{instance}}]-[{{ pod }}]` |
|
||||
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
|
||||
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
|
||||
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)` | `timeseries` | Current memory usage by instance | `prometheus` | `decbytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Datanode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-datanode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{instance}}]-[{{ pod }}]` |
|
||||
| Datanode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-frontend"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Frontend CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]-cpu` |
|
||||
| Metasrv Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]-resident` |
|
||||
| Metasrv CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode Memory per Instance | `sum(process_resident_memory_bytes{}) by (instance, pod)`<br/>`max(greptime_memory_limit_in_bytes{app="greptime-flownode"})` | `timeseries` | Current memory usage by instance | `prometheus` | `bytes` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
| Flownode CPU Usage per Instance | `sum(rate(process_cpu_seconds_total{}[$__rate_interval]) * 1000) by (instance, pod)`<br/>`max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})` | `timeseries` | Current cpu usage by instance | `prometheus` | `none` | `[{{ instance }}]-[{{ pod }}]` |
|
||||
# Frontend Requests
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
@@ -72,6 +72,7 @@
|
||||
| Region Worker Handle Bulk Insert Requests | `histogram_quantile(0.95, sum by(le,instance, stage, pod) (rate(greptime_region_worker_handle_write_bucket[$__rate_interval])))`<br/>`sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_sum[$__rate_interval]))/sum by(instance, stage, pod) (rate(greptime_region_worker_handle_write_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to handle bulk insert region requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
|
||||
| Active Series and Field Builders Count | `sum by(instance, pod) (greptime_mito_memtable_active_series_count)`<br/>`sum by(instance, pod) (greptime_mito_memtable_field_builder_count)` | `timeseries` | Compaction oinput output bytes | `prometheus` | `none` | `[{{instance}}]-[{{pod}}]-series` |
|
||||
| Region Worker Convert Requests | `histogram_quantile(0.95, sum by(le, instance, stage, pod) (rate(greptime_datanode_convert_region_request_bucket[$__rate_interval])))`<br/>`sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_sum[$__rate_interval]))/sum by(le,instance, stage, pod) (rate(greptime_datanode_convert_region_request_count[$__rate_interval]))` | `timeseries` | Per-stage elapsed time for region worker to decode requests. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{stage}}]-P95` |
|
||||
| Cache Miss | `sum by (instance,pod, type) (rate(greptime_mito_cache_miss{}[$__rate_interval]))` | `timeseries` | The local cache miss of the datanode. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{type}}]` |
|
||||
# OpenDAL
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
|
||||
@@ -180,13 +180,18 @@ groups:
|
||||
- title: Datanode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{ pod }}]'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-datanode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Datanode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -197,16 +202,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-datanode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Frontend Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-frontend"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Frontend CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -217,16 +232,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-cpu'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-frontend"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Metasrv Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]-resident'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-metasrv"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Metasrv CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -237,16 +262,26 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-metasrv"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Flownode Memory per Instance
|
||||
type: timeseries
|
||||
description: Current memory usage by instance
|
||||
unit: decbytes
|
||||
unit: bytes
|
||||
queries:
|
||||
- expr: sum(process_resident_memory_bytes{}) by (instance, pod)
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_memory_limit_in_bytes{app="greptime-flownode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Flownode CPU Usage per Instance
|
||||
type: timeseries
|
||||
description: Current cpu usage by instance
|
||||
@@ -257,6 +292,11 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{ instance }}]-[{{ pod }}]'
|
||||
- expr: max(greptime_cpu_limit_in_millicores{app="greptime-flownode"})
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: limit
|
||||
- title: Frontend Requests
|
||||
panels:
|
||||
- title: HTTP QPS per Instance
|
||||
@@ -642,6 +682,15 @@ groups:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{stage}}]-AVG'
|
||||
- title: Cache Miss
|
||||
type: timeseries
|
||||
description: The local cache miss of the datanode.
|
||||
queries:
|
||||
- expr: sum by (instance,pod, type) (rate(greptime_mito_cache_miss{}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{type}}]'
|
||||
- title: OpenDAL
|
||||
panels:
|
||||
- title: QPS per Instance
|
||||
|
||||
@@ -26,7 +26,7 @@ check_dashboards_generation() {
|
||||
./grafana/scripts/gen-dashboards.sh
|
||||
|
||||
if [[ -n "$(git diff --name-only grafana/dashboards/metrics)" ]]; then
|
||||
echo "Error: The dashboards are not generated correctly. You should execute the `make dashboards` command."
|
||||
echo "Error: The dashboards are not generated correctly. You should execute the 'make dashboards' command."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -16,8 +16,8 @@ use api::v1::meta::ProcedureStatus;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cluster::{ClusterInfo, NodeInfo};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::key::flow::flow_state::FlowStat;
|
||||
use common_meta::procedure_executor::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::rpc::procedure;
|
||||
use common_procedure::{ProcedureInfo, ProcedureState};
|
||||
use meta_client::MetaClientRef;
|
||||
|
||||
@@ -75,7 +75,7 @@ impl StoreConfig {
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
BackendImpl::PostgresStore => {
|
||||
let table_name = &self.meta_table_name;
|
||||
let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs)
|
||||
let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, None)
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
Ok(common_meta::kv_backend::rds::PgStore::with_pg_pool(
|
||||
|
||||
@@ -38,6 +38,7 @@ common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-mem-prof.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-procedure.workspace = true
|
||||
|
||||
@@ -28,7 +28,7 @@ use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::datanode::{DatanodeOptions, Instance, APP_NAME};
|
||||
use crate::error::{MetaClientInitSnafu, MissingConfigSnafu, Result, StartDatanodeSnafu};
|
||||
use crate::{create_resource_limit_metrics, log_versions};
|
||||
use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile};
|
||||
|
||||
/// Builder for Datanode instance.
|
||||
pub struct InstanceBuilder {
|
||||
@@ -68,6 +68,7 @@ impl InstanceBuilder {
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&dn_opts.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
|
||||
|
||||
@@ -46,7 +46,7 @@ use crate::error::{
|
||||
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{create_resource_limit_metrics, log_versions, App};
|
||||
use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App};
|
||||
|
||||
pub const APP_NAME: &str = "greptime-flownode";
|
||||
|
||||
@@ -280,6 +280,7 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&opts.component.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Flownode start command: {:#?}", self);
|
||||
|
||||
@@ -47,7 +47,7 @@ use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{create_resource_limit_metrics, log_versions, App};
|
||||
use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App};
|
||||
|
||||
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
|
||||
|
||||
@@ -283,6 +283,7 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&opts.component.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Frontend start command: {:#?}", self);
|
||||
|
||||
@@ -15,7 +15,10 @@
|
||||
#![feature(assert_matches, let_chains)]
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_telemetry::{error, info};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_mem_prof::activate_heap_profile;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use stat::{get_cpu_limit, get_memory_limit};
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -145,3 +148,20 @@ fn log_env_flags() {
|
||||
info!("argument: {}", argument);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn maybe_activate_heap_profile(memory_options: &common_options::memory::MemoryOptions) {
|
||||
if memory_options.enable_heap_profiling {
|
||||
match activate_heap_profile() {
|
||||
Ok(()) => {
|
||||
info!("Heap profile is active");
|
||||
}
|
||||
Err(err) => {
|
||||
if err.status_code() == StatusCode::Unsupported {
|
||||
info!("Heap profile is not supported");
|
||||
} else {
|
||||
warn!(err; "Failed to activate heap profile");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{self, LoadLayeredConfigSnafu, Result, StartMetaServerSnafu};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{create_resource_limit_metrics, log_versions, App};
|
||||
use crate::{create_resource_limit_metrics, log_versions, maybe_activate_heap_profile, App};
|
||||
|
||||
type MetasrvOptions = GreptimeOptions<meta_srv::metasrv::MetasrvOptions>;
|
||||
|
||||
@@ -325,6 +325,7 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&opts.component.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
|
||||
@@ -34,17 +34,19 @@ use common_meta::cluster::{NodeInfo, NodeStatus};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::flow_meta::FlowMetadataAllocator;
|
||||
use common_meta::ddl::table_meta::TableMetadataAllocator;
|
||||
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
|
||||
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl};
|
||||
use common_meta::ddl_manager::DdlManager;
|
||||
use common_meta::key::flow::flow_state::FlowStat;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::procedure_executor::LocalProcedureExecutor;
|
||||
use common_meta::region_keeper::MemoryRegionKeeper;
|
||||
use common_meta::region_registry::LeaderRegionRegistry;
|
||||
use common_meta::sequence::SequenceBuilder;
|
||||
use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef};
|
||||
use common_options::memory::MemoryOptions;
|
||||
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{
|
||||
@@ -83,7 +85,7 @@ use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::error::{Result, StartFlownodeSnafu};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{create_resource_limit_metrics, error, log_versions, App};
|
||||
use crate::{create_resource_limit_metrics, error, log_versions, maybe_activate_heap_profile, App};
|
||||
|
||||
pub const APP_NAME: &str = "greptime-standalone";
|
||||
|
||||
@@ -157,6 +159,7 @@ pub struct StandaloneOptions {
|
||||
pub max_in_flight_write_bytes: Option<ReadableSize>,
|
||||
pub slow_query: Option<SlowQueryOptions>,
|
||||
pub query: QueryOptions,
|
||||
pub memory: MemoryOptions,
|
||||
}
|
||||
|
||||
impl Default for StandaloneOptions {
|
||||
@@ -190,6 +193,7 @@ impl Default for StandaloneOptions {
|
||||
max_in_flight_write_bytes: None,
|
||||
slow_query: Some(SlowQueryOptions::default()),
|
||||
query: QueryOptions::default(),
|
||||
memory: MemoryOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -486,6 +490,7 @@ impl StartCommand {
|
||||
);
|
||||
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
maybe_activate_heap_profile(&opts.component.memory);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Standalone start command: {:#?}", self);
|
||||
@@ -636,9 +641,8 @@ impl StartCommand {
|
||||
flow_metadata_allocator: flow_metadata_allocator.clone(),
|
||||
region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
|
||||
};
|
||||
let procedure_manager_c = procedure_manager.clone();
|
||||
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager.clone(), true)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
#[cfg(feature = "enterprise")]
|
||||
let ddl_manager = {
|
||||
@@ -646,7 +650,11 @@ impl StartCommand {
|
||||
plugins.get();
|
||||
ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
|
||||
};
|
||||
let ddl_task_executor: ProcedureExecutorRef = Arc::new(ddl_manager);
|
||||
|
||||
let procedure_executor = Arc::new(LocalProcedureExecutor::new(
|
||||
Arc::new(ddl_manager),
|
||||
procedure_manager.clone(),
|
||||
));
|
||||
|
||||
let fe_instance = FrontendBuilder::new(
|
||||
fe_opts.clone(),
|
||||
@@ -654,7 +662,7 @@ impl StartCommand {
|
||||
layered_cache_registry.clone(),
|
||||
catalog_manager.clone(),
|
||||
node_manager.clone(),
|
||||
ddl_task_executor.clone(),
|
||||
procedure_executor.clone(),
|
||||
process_manager,
|
||||
)
|
||||
.with_plugin(plugins.clone())
|
||||
@@ -679,7 +687,7 @@ impl StartCommand {
|
||||
catalog_manager.clone(),
|
||||
kv_backend.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
ddl_task_executor.clone(),
|
||||
procedure_executor,
|
||||
node_manager,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -34,6 +34,7 @@ use query::options::QueryOptions;
|
||||
use servers::export_metrics::ExportMetricsOption;
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use store_api::path_utils::WAL_DIR;
|
||||
|
||||
#[allow(deprecated)]
|
||||
@@ -190,6 +191,13 @@ fn test_load_metasrv_example_config() {
|
||||
remote_write: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
backend_tls: Some(TlsOption {
|
||||
mode: TlsMode::Prefer,
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
ca_cert_path: String::new(),
|
||||
watch: false,
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
@@ -245,6 +253,7 @@ fn test_load_flownode_example_config() {
|
||||
..Default::default()
|
||||
},
|
||||
user_provider: None,
|
||||
memory: Default::default(),
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
@@ -298,6 +307,7 @@ fn test_load_standalone_example_config() {
|
||||
cors_allowed_origins: vec!["https://example.com".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
|
||||
@@ -19,8 +19,8 @@ use std::io::BufReader;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use error::{
|
||||
BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
|
||||
ReadOptProfSnafu,
|
||||
ActivateProfSnafu, BuildTempPathSnafu, DeactivateProfSnafu, DumpProfileDataSnafu,
|
||||
OpenTempFileSnafu, ProfilingNotEnabledSnafu, ReadOptProfSnafu, ReadProfActiveSnafu,
|
||||
};
|
||||
use jemalloc_pprof_mappings::MAPPINGS;
|
||||
use jemalloc_pprof_utils::{parse_jeheap, FlamegraphOptions, StackProfile};
|
||||
@@ -31,6 +31,7 @@ use crate::error::{FlamegraphSnafu, ParseJeHeapSnafu, Result};
|
||||
|
||||
const PROF_DUMP: &[u8] = b"prof.dump\0";
|
||||
const OPT_PROF: &[u8] = b"opt.prof\0";
|
||||
const PROF_ACTIVE: &[u8] = b"prof.active\0";
|
||||
|
||||
pub async fn dump_profile() -> Result<Vec<u8>> {
|
||||
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
|
||||
@@ -93,6 +94,27 @@ pub async fn dump_flamegraph() -> Result<Vec<u8>> {
|
||||
let flamegraph = profile.to_flamegraph(&mut opts).context(FlamegraphSnafu)?;
|
||||
Ok(flamegraph)
|
||||
}
|
||||
|
||||
pub fn activate_heap_profile() -> Result<()> {
|
||||
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
|
||||
unsafe {
|
||||
tikv_jemalloc_ctl::raw::update(PROF_ACTIVE, true).context(ActivateProfSnafu)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn deactivate_heap_profile() -> Result<()> {
|
||||
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
|
||||
unsafe {
|
||||
tikv_jemalloc_ctl::raw::update(PROF_ACTIVE, false).context(DeactivateProfSnafu)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_heap_profile_active() -> Result<bool> {
|
||||
unsafe { Ok(tikv_jemalloc_ctl::raw::read::<bool>(PROF_ACTIVE).context(ReadProfActiveSnafu)?) }
|
||||
}
|
||||
|
||||
fn is_prof_enabled() -> Result<bool> {
|
||||
// safety: OPT_PROF variable, if present, is always a boolean value.
|
||||
Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
|
||||
|
||||
@@ -53,6 +53,24 @@ pub enum Error {
|
||||
#[snafu(source)]
|
||||
error: tikv_jemalloc_ctl::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to activate heap profiling"))]
|
||||
ActivateProf {
|
||||
#[snafu(source)]
|
||||
error: tikv_jemalloc_ctl::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deactivate heap profiling"))]
|
||||
DeactivateProf {
|
||||
#[snafu(source)]
|
||||
error: tikv_jemalloc_ctl::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read heap profiling status"))]
|
||||
ReadProfActive {
|
||||
#[snafu(source)]
|
||||
error: tikv_jemalloc_ctl::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -63,6 +81,9 @@ impl ErrorExt for Error {
|
||||
Error::BuildTempPath { .. } => StatusCode::Internal,
|
||||
Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
|
||||
Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
|
||||
Error::ActivateProf { .. } => StatusCode::Internal,
|
||||
Error::DeactivateProf { .. } => StatusCode::Internal,
|
||||
Error::ReadProfActive { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,10 @@ pub mod error;
|
||||
#[cfg(not(windows))]
|
||||
mod jemalloc;
|
||||
#[cfg(not(windows))]
|
||||
pub use jemalloc::{dump_flamegraph, dump_pprof, dump_profile};
|
||||
pub use jemalloc::{
|
||||
activate_heap_profile, deactivate_heap_profile, dump_flamegraph, dump_pprof, dump_profile,
|
||||
is_heap_profile_active,
|
||||
};
|
||||
|
||||
#[cfg(windows)]
|
||||
pub async fn dump_profile() -> error::Result<Vec<u8>> {
|
||||
@@ -33,3 +36,18 @@ pub async fn dump_pprof() -> error::Result<Vec<u8>> {
|
||||
pub async fn dump_flamegraph() -> error::Result<Vec<u8>> {
|
||||
error::ProfilingNotSupportedSnafu.fail()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn activate_heap_profile() -> error::Result<()> {
|
||||
error::ProfilingNotSupportedSnafu.fail()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn deactivate_heap_profile() -> error::Result<()> {
|
||||
error::ProfilingNotSupportedSnafu.fail()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn is_heap_profile_active() -> error::Result<bool> {
|
||||
error::ProfilingNotSupportedSnafu.fail()
|
||||
}
|
||||
|
||||
@@ -6,7 +6,16 @@ license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
|
||||
pg_kvbackend = [
|
||||
"dep:tokio-postgres",
|
||||
"dep:backon",
|
||||
"dep:deadpool-postgres",
|
||||
"dep:deadpool",
|
||||
"dep:tokio-postgres-rustls",
|
||||
"dep:rustls-pemfile",
|
||||
"dep:rustls-native-certs",
|
||||
"dep:rustls",
|
||||
]
|
||||
mysql_kvbackend = ["dep:sqlx", "dep:backon"]
|
||||
enterprise = []
|
||||
|
||||
@@ -57,6 +66,9 @@ prost.workspace = true
|
||||
rand.workspace = true
|
||||
regex.workspace = true
|
||||
rskafka.workspace = true
|
||||
rustls = { workspace = true, default-features = false, features = ["ring", "logging", "std", "tls12"], optional = true }
|
||||
rustls-native-certs = { version = "0.7", optional = true }
|
||||
rustls-pemfile = { version = "2.0", optional = true }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
@@ -68,6 +80,7 @@ strum.workspace = true
|
||||
table = { workspace = true, features = ["testing"] }
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tokio-postgres-rustls = { version = "0.12", optional = true }
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
typetag.workspace = true
|
||||
|
||||
@@ -15,25 +15,17 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::ProcedureDetailResponse;
|
||||
use common_telemetry::tracing_context::W3cTrace;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
|
||||
use crate::cache_invalidator::CacheInvalidatorRef;
|
||||
use crate::ddl::flow_meta::FlowMetadataAllocatorRef;
|
||||
use crate::ddl::table_meta::TableMetadataAllocatorRef;
|
||||
use crate::error::{Result, UnsupportedSnafu};
|
||||
use crate::key::flow::FlowMetadataManagerRef;
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::region_keeper::MemoryRegionKeeperRef;
|
||||
use crate::region_registry::LeaderRegionRegistryRef;
|
||||
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use crate::rpc::procedure::{
|
||||
AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse,
|
||||
RemoveRegionFollowerRequest,
|
||||
};
|
||||
use crate::DatanodeId;
|
||||
|
||||
pub mod alter_database;
|
||||
@@ -59,64 +51,6 @@ pub(crate) mod tests;
|
||||
pub mod truncate_table;
|
||||
pub mod utils;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ExecutorContext {
|
||||
pub tracing_context: Option<W3cTrace>,
|
||||
}
|
||||
|
||||
/// The procedure executor that accepts ddl, region migration task etc.
|
||||
#[async_trait::async_trait]
|
||||
pub trait ProcedureExecutor: Send + Sync {
|
||||
/// Submit a ddl task
|
||||
async fn submit_ddl_task(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse>;
|
||||
|
||||
/// Add a region follower
|
||||
async fn add_region_follower(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
_request: AddRegionFollowerRequest,
|
||||
) -> Result<()> {
|
||||
UnsupportedSnafu {
|
||||
operation: "add_region_follower",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// Remove a region follower
|
||||
async fn remove_region_follower(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
_request: RemoveRegionFollowerRequest,
|
||||
) -> Result<()> {
|
||||
UnsupportedSnafu {
|
||||
operation: "remove_region_follower",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// Submit a region migration task
|
||||
async fn migrate_region(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: MigrateRegionRequest,
|
||||
) -> Result<MigrateRegionResponse>;
|
||||
|
||||
/// Query the procedure state by its id
|
||||
async fn query_procedure_state(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
pid: &str,
|
||||
) -> Result<ProcedureStateResponse>;
|
||||
|
||||
async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
|
||||
}
|
||||
|
||||
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
|
||||
|
||||
/// Metadata allocated to a table.
|
||||
#[derive(Default)]
|
||||
pub struct TableMetadata {
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::ProcedureDetailResponse;
|
||||
use common_procedure::{
|
||||
watcher, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId,
|
||||
};
|
||||
@@ -37,16 +36,16 @@ use crate::ddl::drop_flow::DropFlowProcedure;
|
||||
use crate::ddl::drop_table::DropTableProcedure;
|
||||
use crate::ddl::drop_view::DropViewProcedure;
|
||||
use crate::ddl::truncate_table::TruncateTableProcedure;
|
||||
use crate::ddl::{utils, DdlContext, ExecutorContext, ProcedureExecutor};
|
||||
use crate::ddl::{utils, DdlContext};
|
||||
use crate::error::{
|
||||
EmptyDdlTasksSnafu, ParseProcedureIdSnafu, ProcedureNotFoundSnafu, ProcedureOutputSnafu,
|
||||
QueryProcedureSnafu, RegisterProcedureLoaderSnafu, Result, SubmitProcedureSnafu,
|
||||
TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
|
||||
UnexpectedLogicalRouteTableSnafu, UnsupportedSnafu, WaitProcedureSnafu,
|
||||
EmptyDdlTasksSnafu, ProcedureOutputSnafu, RegisterProcedureLoaderSnafu, Result,
|
||||
SubmitProcedureSnafu, TableInfoNotFoundSnafu, TableNotFoundSnafu, TableRouteNotFoundSnafu,
|
||||
UnexpectedLogicalRouteTableSnafu, WaitProcedureSnafu,
|
||||
};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use crate::procedure_executor::ExecutorContext;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::rpc::ddl::trigger::CreateTriggerTask;
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -65,8 +64,6 @@ use crate::rpc::ddl::{
|
||||
CreateViewTask, DropDatabaseTask, DropFlowTask, DropTableTask, DropViewTask, QueryContext,
|
||||
SubmitDdlTaskRequest, SubmitDdlTaskResponse, TruncateTableTask,
|
||||
};
|
||||
use crate::rpc::procedure;
|
||||
use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
|
||||
use crate::rpc::router::RegionRoute;
|
||||
|
||||
pub type DdlManagerRef = Arc<DdlManager>;
|
||||
@@ -418,6 +415,75 @@ impl DdlManager {
|
||||
|
||||
Ok((procedure_id, output))
|
||||
}
|
||||
|
||||
pub async fn submit_ddl_task(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let span = ctx
|
||||
.tracing_context
|
||||
.as_ref()
|
||||
.map(TracingContext::from_w3c)
|
||||
.unwrap_or_else(TracingContext::from_current_span)
|
||||
.attach(tracing::info_span!("DdlManager::submit_ddl_task"));
|
||||
async move {
|
||||
debug!("Submitting Ddl task: {:?}", request.task);
|
||||
match request.task {
|
||||
CreateTable(create_table_task) => {
|
||||
handle_create_table_task(self, create_table_task).await
|
||||
}
|
||||
DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
|
||||
AlterTable(alter_table_task) => {
|
||||
handle_alter_table_task(self, alter_table_task).await
|
||||
}
|
||||
TruncateTable(truncate_table_task) => {
|
||||
handle_truncate_table_task(self, truncate_table_task).await
|
||||
}
|
||||
CreateLogicalTables(create_table_tasks) => {
|
||||
handle_create_logical_table_tasks(self, create_table_tasks).await
|
||||
}
|
||||
AlterLogicalTables(alter_table_tasks) => {
|
||||
handle_alter_logical_table_tasks(self, alter_table_tasks).await
|
||||
}
|
||||
DropLogicalTables(_) => todo!(),
|
||||
CreateDatabase(create_database_task) => {
|
||||
handle_create_database_task(self, create_database_task).await
|
||||
}
|
||||
DropDatabase(drop_database_task) => {
|
||||
handle_drop_database_task(self, drop_database_task).await
|
||||
}
|
||||
AlterDatabase(alter_database_task) => {
|
||||
handle_alter_database_task(self, alter_database_task).await
|
||||
}
|
||||
CreateFlow(create_flow_task) => {
|
||||
handle_create_flow_task(self, create_flow_task, request.query_context.into())
|
||||
.await
|
||||
}
|
||||
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
|
||||
CreateView(create_view_task) => {
|
||||
handle_create_view_task(self, create_view_task).await
|
||||
}
|
||||
DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
|
||||
#[cfg(feature = "enterprise")]
|
||||
CreateTrigger(create_trigger_task) => {
|
||||
handle_create_trigger_task(
|
||||
self,
|
||||
create_trigger_task,
|
||||
request.query_context.into(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
#[cfg(feature = "enterprise")]
|
||||
DropTrigger(drop_trigger_task) => {
|
||||
handle_drop_trigger_task(self, drop_trigger_task, request.query_context.into())
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
.trace(span)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_truncate_table_task(
|
||||
@@ -667,6 +733,8 @@ async fn handle_drop_trigger_task(
|
||||
query_context: QueryContext,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
|
||||
use crate::error::UnsupportedSnafu;
|
||||
|
||||
return UnsupportedSnafu {
|
||||
operation: "drop trigger",
|
||||
}
|
||||
@@ -746,6 +814,8 @@ async fn handle_create_trigger_task(
|
||||
query_context: QueryContext,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
|
||||
use crate::error::UnsupportedSnafu;
|
||||
|
||||
return UnsupportedSnafu {
|
||||
operation: "create trigger",
|
||||
}
|
||||
@@ -822,119 +892,6 @@ async fn handle_create_view_task(
|
||||
})
|
||||
}
|
||||
|
||||
/// TODO(dennis): let [`DdlManager`] implement [`ProcedureExecutor`] looks weird, find some way to refactor it.
|
||||
#[async_trait::async_trait]
|
||||
impl ProcedureExecutor for DdlManager {
|
||||
async fn submit_ddl_task(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let span = ctx
|
||||
.tracing_context
|
||||
.as_ref()
|
||||
.map(TracingContext::from_w3c)
|
||||
.unwrap_or(TracingContext::from_current_span())
|
||||
.attach(tracing::info_span!("DdlManager::submit_ddl_task"));
|
||||
async move {
|
||||
debug!("Submitting Ddl task: {:?}", request.task);
|
||||
match request.task {
|
||||
CreateTable(create_table_task) => {
|
||||
handle_create_table_task(self, create_table_task).await
|
||||
}
|
||||
DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
|
||||
AlterTable(alter_table_task) => {
|
||||
handle_alter_table_task(self, alter_table_task).await
|
||||
}
|
||||
TruncateTable(truncate_table_task) => {
|
||||
handle_truncate_table_task(self, truncate_table_task).await
|
||||
}
|
||||
CreateLogicalTables(create_table_tasks) => {
|
||||
handle_create_logical_table_tasks(self, create_table_tasks).await
|
||||
}
|
||||
AlterLogicalTables(alter_table_tasks) => {
|
||||
handle_alter_logical_table_tasks(self, alter_table_tasks).await
|
||||
}
|
||||
DropLogicalTables(_) => todo!(),
|
||||
CreateDatabase(create_database_task) => {
|
||||
handle_create_database_task(self, create_database_task).await
|
||||
}
|
||||
DropDatabase(drop_database_task) => {
|
||||
handle_drop_database_task(self, drop_database_task).await
|
||||
}
|
||||
AlterDatabase(alter_database_task) => {
|
||||
handle_alter_database_task(self, alter_database_task).await
|
||||
}
|
||||
CreateFlow(create_flow_task) => {
|
||||
handle_create_flow_task(self, create_flow_task, request.query_context.into())
|
||||
.await
|
||||
}
|
||||
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
|
||||
CreateView(create_view_task) => {
|
||||
handle_create_view_task(self, create_view_task).await
|
||||
}
|
||||
DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
|
||||
#[cfg(feature = "enterprise")]
|
||||
CreateTrigger(create_trigger_task) => {
|
||||
handle_create_trigger_task(
|
||||
self,
|
||||
create_trigger_task,
|
||||
request.query_context.into(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
#[cfg(feature = "enterprise")]
|
||||
DropTrigger(drop_trigger_task) => {
|
||||
handle_drop_trigger_task(self, drop_trigger_task, request.query_context.into())
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
.trace(span)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn migrate_region(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
_request: MigrateRegionRequest,
|
||||
) -> Result<MigrateRegionResponse> {
|
||||
UnsupportedSnafu {
|
||||
operation: "migrate_region",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn query_procedure_state(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
pid: &str,
|
||||
) -> Result<ProcedureStateResponse> {
|
||||
let pid =
|
||||
ProcedureId::parse_str(pid).with_context(|_| ParseProcedureIdSnafu { key: pid })?;
|
||||
|
||||
let state = self
|
||||
.procedure_manager
|
||||
.procedure_state(pid)
|
||||
.await
|
||||
.context(QueryProcedureSnafu)?
|
||||
.context(ProcedureNotFoundSnafu {
|
||||
pid: pid.to_string(),
|
||||
})?;
|
||||
|
||||
Ok(procedure::procedure_state_to_pb_response(&state))
|
||||
}
|
||||
|
||||
async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
|
||||
let metas = self
|
||||
.procedure_manager
|
||||
.list_procedures()
|
||||
.await
|
||||
.context(QueryProcedureSnafu)?;
|
||||
Ok(procedure::procedure_details_to_pb_response(metas))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -403,6 +403,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Catalog not found, catalog: {}", catalog))]
|
||||
CatalogNotFound {
|
||||
catalog: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid metadata, err: {}", err_msg))]
|
||||
InvalidMetadata {
|
||||
err_msg: String,
|
||||
@@ -733,6 +740,32 @@ pub enum Error {
|
||||
operation: String,
|
||||
},
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[snafu(display("Failed to setup PostgreSQL TLS configuration: {}", reason))]
|
||||
PostgresTlsConfig {
|
||||
reason: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[snafu(display("Failed to load TLS certificate from path: {}", path))]
|
||||
LoadTlsCertificate {
|
||||
path: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
#[snafu(display("Invalid TLS configuration: {}", reason))]
|
||||
InvalidTlsConfig {
|
||||
reason: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
#[snafu(display("Failed to execute via MySql, sql: {}", sql))]
|
||||
MySqlExecution {
|
||||
@@ -1062,6 +1095,7 @@ impl ErrorExt for Error {
|
||||
ParseProcedureId { .. }
|
||||
| InvalidNumTopics { .. }
|
||||
| SchemaNotFound { .. }
|
||||
| CatalogNotFound { .. }
|
||||
| InvalidNodeInfoKey { .. }
|
||||
| InvalidStatKey { .. }
|
||||
| ParseNum { .. }
|
||||
@@ -1072,7 +1106,10 @@ impl ErrorExt for Error {
|
||||
PostgresExecution { .. }
|
||||
| CreatePostgresPool { .. }
|
||||
| GetPostgresConnection { .. }
|
||||
| PostgresTransaction { .. } => StatusCode::Internal,
|
||||
| PostgresTransaction { .. }
|
||||
| PostgresTlsConfig { .. }
|
||||
| LoadTlsCertificate { .. }
|
||||
| InvalidTlsConfig { .. } => StatusCode::Internal,
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
MySqlExecution { .. } | CreateMySqlPool { .. } | MySqlTransaction { .. } => {
|
||||
StatusCode::Internal
|
||||
|
||||
@@ -40,7 +40,7 @@ const RDS_STORE_OP_RANGE_DELETE: &str = "range_delete";
|
||||
const RDS_STORE_OP_BATCH_DELETE: &str = "batch_delete";
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
mod postgres;
|
||||
pub mod postgres;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
pub use postgres::PgStore;
|
||||
|
||||
@@ -118,7 +118,7 @@ impl<T: Executor> ExecutorImpl<'_, T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[warn(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
|
||||
#[allow(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
|
||||
async fn execute(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<()> {
|
||||
match self {
|
||||
Self::Default(executor) => executor.execute(query, params).await,
|
||||
|
||||
@@ -12,19 +12,29 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use deadpool_postgres::{Config, Pool, Runtime};
|
||||
use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier};
|
||||
use rustls::pki_types::{CertificateDer, ServerName, UnixTime};
|
||||
use rustls::server::ParsedCertificate;
|
||||
// TLS-related imports (feature-gated)
|
||||
use rustls::ClientConfig;
|
||||
use rustls::{DigitallySignedStruct, Error as TlsError, SignatureScheme};
|
||||
use rustls_pemfile::{certs, private_key};
|
||||
use snafu::ResultExt;
|
||||
use strum::AsRefStr;
|
||||
use tokio_postgres::types::ToSql;
|
||||
use tokio_postgres::{IsolationLevel, NoTls, Row};
|
||||
use tokio_postgres_rustls::MakeRustlsConnect;
|
||||
|
||||
use crate::error::{
|
||||
CreatePostgresPoolSnafu, GetPostgresConnectionSnafu, PostgresExecutionSnafu,
|
||||
PostgresTransactionSnafu, Result,
|
||||
CreatePostgresPoolSnafu, GetPostgresConnectionSnafu, LoadTlsCertificateSnafu,
|
||||
PostgresExecutionSnafu, PostgresTlsConfigSnafu, PostgresTransactionSnafu, Result,
|
||||
};
|
||||
use crate::kv_backend::rds::{
|
||||
Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
|
||||
@@ -38,6 +48,41 @@ use crate::rpc::store::{
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
/// TLS mode configuration for PostgreSQL connections.
|
||||
/// This mirrors the TlsMode from servers::tls to avoid circular dependencies.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||
pub enum TlsMode {
|
||||
Disable,
|
||||
#[default]
|
||||
Prefer,
|
||||
Require,
|
||||
VerifyCa,
|
||||
VerifyFull,
|
||||
}
|
||||
|
||||
/// TLS configuration for PostgreSQL connections.
|
||||
/// This mirrors the TlsOption from servers::tls to avoid circular dependencies.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TlsOption {
|
||||
pub mode: TlsMode,
|
||||
pub cert_path: String,
|
||||
pub key_path: String,
|
||||
pub ca_cert_path: String,
|
||||
pub watch: bool,
|
||||
}
|
||||
|
||||
impl Default for TlsOption {
|
||||
fn default() -> Self {
|
||||
TlsOption {
|
||||
mode: TlsMode::Prefer,
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
ca_cert_path: String::new(),
|
||||
watch: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const PG_STORE_NAME: &str = "pg_store";
|
||||
|
||||
pub struct PgClient(deadpool::managed::Object<deadpool_postgres::Manager>);
|
||||
@@ -348,6 +393,265 @@ impl ExecutorFactory<PgClient> for PgExecutorFactory {
|
||||
/// It uses [deadpool_postgres::Pool] as the connection pool for [RdsStore].
|
||||
pub type PgStore = RdsStore<PgClient, PgExecutorFactory, PgSqlTemplateSet>;
|
||||
|
||||
/// Creates a PostgreSQL TLS connector based on the provided configuration.
|
||||
///
|
||||
/// This function creates a rustls-based TLS connector for PostgreSQL connections,
|
||||
/// following PostgreSQL's TLS mode specifications exactly:
|
||||
///
|
||||
/// # TLS Modes (PostgreSQL Specification)
|
||||
///
|
||||
/// - `Disable`: No TLS connection attempted
|
||||
/// - `Prefer`: Try TLS first, fallback to plaintext if TLS fails (handled by connection logic)
|
||||
/// - `Require`: Only TLS connections, but NO certificate verification (accept any cert)
|
||||
/// - `VerifyCa`: TLS + verify certificate is signed by trusted CA (no hostname verification)
|
||||
/// - `VerifyFull`: TLS + verify CA + verify hostname matches certificate SAN
|
||||
///
|
||||
pub fn create_postgres_tls_connector(tls_config: &TlsOption) -> Result<MakeRustlsConnect> {
|
||||
common_telemetry::info!(
|
||||
"Creating PostgreSQL TLS connector with mode: {:?}",
|
||||
tls_config.mode
|
||||
);
|
||||
|
||||
let config_builder = match tls_config.mode {
|
||||
TlsMode::Disable => {
|
||||
return PostgresTlsConfigSnafu {
|
||||
reason: "Cannot create TLS connector for Disable mode".to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
TlsMode::Prefer | TlsMode::Require => {
|
||||
// For Prefer/Require: Accept any certificate (no verification)
|
||||
let verifier = Arc::new(AcceptAnyVerifier);
|
||||
ClientConfig::builder()
|
||||
.dangerous()
|
||||
.with_custom_certificate_verifier(verifier)
|
||||
}
|
||||
TlsMode::VerifyCa => {
|
||||
// For VerifyCa: Verify server cert against CA store, but skip hostname verification
|
||||
let ca_store = load_ca(&tls_config.ca_cert_path)?;
|
||||
let verifier = Arc::new(NoHostnameVerification { roots: ca_store });
|
||||
ClientConfig::builder()
|
||||
.dangerous()
|
||||
.with_custom_certificate_verifier(verifier)
|
||||
}
|
||||
TlsMode::VerifyFull => {
|
||||
let ca_store = load_ca(&tls_config.ca_cert_path)?;
|
||||
ClientConfig::builder().with_root_certificates(ca_store)
|
||||
}
|
||||
};
|
||||
|
||||
// Create the TLS client configuration based on the mode and client cert requirements
|
||||
let client_config = if !tls_config.cert_path.is_empty() && !tls_config.key_path.is_empty() {
|
||||
// Client certificate authentication required
|
||||
common_telemetry::info!("Loading client certificate for mutual TLS");
|
||||
let cert_chain = load_certs(&tls_config.cert_path)?;
|
||||
let private_key = load_private_key(&tls_config.key_path)?;
|
||||
|
||||
config_builder
|
||||
.with_client_auth_cert(cert_chain, private_key)
|
||||
.map_err(|e| {
|
||||
PostgresTlsConfigSnafu {
|
||||
reason: format!("Failed to configure client authentication: {}", e),
|
||||
}
|
||||
.build()
|
||||
})?
|
||||
} else {
|
||||
common_telemetry::info!("No client certificate provided, skip client authentication");
|
||||
config_builder.with_no_client_auth()
|
||||
};
|
||||
|
||||
common_telemetry::info!("Successfully created PostgreSQL TLS connector");
|
||||
Ok(MakeRustlsConnect::new(client_config))
|
||||
}
|
||||
|
||||
/// For Prefer/Require mode, we accept any server certificate without verification.
|
||||
#[derive(Debug)]
|
||||
struct AcceptAnyVerifier;
|
||||
|
||||
impl ServerCertVerifier for AcceptAnyVerifier {
|
||||
fn verify_server_cert(
|
||||
&self,
|
||||
_end_entity: &CertificateDer<'_>,
|
||||
_intermediates: &[CertificateDer<'_>],
|
||||
_server_name: &ServerName<'_>,
|
||||
_ocsp_response: &[u8],
|
||||
_now: UnixTime,
|
||||
) -> std::result::Result<ServerCertVerified, TlsError> {
|
||||
common_telemetry::debug!(
|
||||
"Accepting server certificate without verification (Prefer/Require mode)"
|
||||
);
|
||||
Ok(ServerCertVerified::assertion())
|
||||
}
|
||||
|
||||
fn verify_tls12_signature(
|
||||
&self,
|
||||
_message: &[u8],
|
||||
_cert: &CertificateDer<'_>,
|
||||
_dss: &DigitallySignedStruct,
|
||||
) -> std::result::Result<HandshakeSignatureValid, TlsError> {
|
||||
// Accept any signature without verification
|
||||
Ok(HandshakeSignatureValid::assertion())
|
||||
}
|
||||
|
||||
fn verify_tls13_signature(
|
||||
&self,
|
||||
_message: &[u8],
|
||||
_cert: &CertificateDer<'_>,
|
||||
_dss: &DigitallySignedStruct,
|
||||
) -> std::result::Result<HandshakeSignatureValid, TlsError> {
|
||||
// Accept any signature without verification
|
||||
Ok(HandshakeSignatureValid::assertion())
|
||||
}
|
||||
|
||||
fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
|
||||
// Support all signature schemes
|
||||
rustls::crypto::ring::default_provider()
|
||||
.signature_verification_algorithms
|
||||
.supported_schemes()
|
||||
}
|
||||
}
|
||||
|
||||
/// For VerifyCa mode, we verify the server certificate against our CA store
|
||||
/// and skip verify server's HostName.
|
||||
#[derive(Debug)]
|
||||
struct NoHostnameVerification {
|
||||
roots: Arc<rustls::RootCertStore>,
|
||||
}
|
||||
|
||||
impl ServerCertVerifier for NoHostnameVerification {
|
||||
fn verify_server_cert(
|
||||
&self,
|
||||
end_entity: &CertificateDer<'_>,
|
||||
intermediates: &[CertificateDer<'_>],
|
||||
_server_name: &ServerName<'_>,
|
||||
_ocsp_response: &[u8],
|
||||
now: UnixTime,
|
||||
) -> std::result::Result<ServerCertVerified, TlsError> {
|
||||
let cert = ParsedCertificate::try_from(end_entity)?;
|
||||
rustls::client::verify_server_cert_signed_by_trust_anchor(
|
||||
&cert,
|
||||
&self.roots,
|
||||
intermediates,
|
||||
now,
|
||||
rustls::crypto::ring::default_provider()
|
||||
.signature_verification_algorithms
|
||||
.all,
|
||||
)?;
|
||||
|
||||
Ok(ServerCertVerified::assertion())
|
||||
}
|
||||
|
||||
fn verify_tls12_signature(
|
||||
&self,
|
||||
message: &[u8],
|
||||
cert: &CertificateDer<'_>,
|
||||
dss: &DigitallySignedStruct,
|
||||
) -> std::result::Result<HandshakeSignatureValid, TlsError> {
|
||||
rustls::crypto::verify_tls12_signature(
|
||||
message,
|
||||
cert,
|
||||
dss,
|
||||
&rustls::crypto::ring::default_provider().signature_verification_algorithms,
|
||||
)
|
||||
}
|
||||
|
||||
fn verify_tls13_signature(
|
||||
&self,
|
||||
message: &[u8],
|
||||
cert: &CertificateDer<'_>,
|
||||
dss: &DigitallySignedStruct,
|
||||
) -> std::result::Result<HandshakeSignatureValid, TlsError> {
|
||||
rustls::crypto::verify_tls13_signature(
|
||||
message,
|
||||
cert,
|
||||
dss,
|
||||
&rustls::crypto::ring::default_provider().signature_verification_algorithms,
|
||||
)
|
||||
}
|
||||
|
||||
fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
|
||||
// Support all signature schemes
|
||||
rustls::crypto::ring::default_provider()
|
||||
.signature_verification_algorithms
|
||||
.supported_schemes()
|
||||
}
|
||||
}
|
||||
|
||||
fn load_certs(path: &str) -> Result<Vec<rustls::pki_types::CertificateDer<'static>>> {
|
||||
let file = File::open(path).context(LoadTlsCertificateSnafu { path })?;
|
||||
let mut reader = BufReader::new(file);
|
||||
let certs = certs(&mut reader)
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.map_err(|e| {
|
||||
PostgresTlsConfigSnafu {
|
||||
reason: format!("Failed to parse certificates from {}: {}", path, e),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
Ok(certs)
|
||||
}
|
||||
|
||||
fn load_private_key(path: &str) -> Result<rustls::pki_types::PrivateKeyDer<'static>> {
|
||||
let file = File::open(path).context(LoadTlsCertificateSnafu { path })?;
|
||||
let mut reader = BufReader::new(file);
|
||||
let key = private_key(&mut reader)
|
||||
.map_err(|e| {
|
||||
PostgresTlsConfigSnafu {
|
||||
reason: format!("Failed to parse private key from {}: {}", path, e),
|
||||
}
|
||||
.build()
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
PostgresTlsConfigSnafu {
|
||||
reason: format!("No private key found in {}", path),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
Ok(key)
|
||||
}
|
||||
|
||||
fn load_ca(path: &str) -> Result<Arc<rustls::RootCertStore>> {
|
||||
let mut root_store = rustls::RootCertStore::empty();
|
||||
|
||||
// Add system root certificates
|
||||
match rustls_native_certs::load_native_certs() {
|
||||
Ok(certs) => {
|
||||
let num_certs = certs.len();
|
||||
for cert in certs {
|
||||
if let Err(e) = root_store.add(cert) {
|
||||
return PostgresTlsConfigSnafu {
|
||||
reason: format!("Failed to add root certificate: {}", e),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
common_telemetry::info!("Loaded {num_certs} system root certificates successfully");
|
||||
}
|
||||
Err(e) => {
|
||||
return PostgresTlsConfigSnafu {
|
||||
reason: format!("Failed to load system root certificates: {}", e),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
|
||||
// Try add custom CA certificate if provided
|
||||
if !path.is_empty() {
|
||||
let ca_certs = load_certs(path)?;
|
||||
for cert in ca_certs {
|
||||
if let Err(e) = root_store.add(cert) {
|
||||
return PostgresTlsConfigSnafu {
|
||||
reason: format!("Failed to add custom CA certificate: {}", e),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
common_telemetry::info!("Added custom CA certificate from {}", path);
|
||||
}
|
||||
|
||||
Ok(Arc::new(root_store))
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvQueryExecutor<PgClient> for PgStore {
|
||||
async fn range_with_query_executor(
|
||||
@@ -491,17 +795,54 @@ impl KvQueryExecutor<PgClient> for PgStore {
|
||||
}
|
||||
|
||||
impl PgStore {
|
||||
/// Create [PgStore] impl of [KvBackendRef] from url.
|
||||
pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
|
||||
/// Create [PgStore] impl of [KvBackendRef] from url with optional TLS support.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `url` - PostgreSQL connection URL
|
||||
/// * `table_name` - Name of the table to use for key-value storage
|
||||
/// * `max_txn_ops` - Maximum number of operations per transaction
|
||||
/// * `tls_config` - Optional TLS configuration. If None, uses plaintext connection.
|
||||
pub async fn with_url_and_tls(
|
||||
url: &str,
|
||||
table_name: &str,
|
||||
max_txn_ops: usize,
|
||||
tls_config: Option<TlsOption>,
|
||||
) -> Result<KvBackendRef> {
|
||||
let mut cfg = Config::new();
|
||||
cfg.url = Some(url.to_string());
|
||||
// TODO(weny, CookiePie): add tls support
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)?;
|
||||
|
||||
let pool = match tls_config {
|
||||
Some(tls_config) if tls_config.mode != TlsMode::Disable => {
|
||||
match create_postgres_tls_connector(&tls_config) {
|
||||
Ok(tls_connector) => cfg
|
||||
.create_pool(Some(Runtime::Tokio1), tls_connector)
|
||||
.context(CreatePostgresPoolSnafu)?,
|
||||
Err(e) => {
|
||||
if tls_config.mode == TlsMode::Prefer {
|
||||
// Fallback to insecure connection if TLS fails
|
||||
common_telemetry::info!("Failed to create TLS connector, falling back to insecure connection");
|
||||
cfg.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)?
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(CreatePostgresPoolSnafu)?,
|
||||
};
|
||||
|
||||
Self::with_pg_pool(pool, table_name, max_txn_ops).await
|
||||
}
|
||||
|
||||
/// Create [PgStore] impl of [KvBackendRef] from url (backward compatibility).
|
||||
pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
|
||||
Self::with_url_and_tls(url, table_name, max_txn_ops, None).await
|
||||
}
|
||||
|
||||
/// Create [PgStore] impl of [KvBackendRef] from [deadpool_postgres::Pool].
|
||||
pub async fn with_pg_pool(
|
||||
pool: Pool,
|
||||
|
||||
@@ -37,6 +37,7 @@ pub mod node_expiry_listener;
|
||||
pub mod node_manager;
|
||||
pub mod peer;
|
||||
pub mod poison_key;
|
||||
pub mod procedure_executor;
|
||||
pub mod range_stream;
|
||||
pub mod reconciliation;
|
||||
pub mod region_keeper;
|
||||
|
||||
155
src/common/meta/src/procedure_executor.rs
Normal file
155
src/common/meta/src/procedure_executor.rs
Normal file
@@ -0,0 +1,155 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::ProcedureDetailResponse;
|
||||
use common_procedure::{ProcedureId, ProcedureManagerRef};
|
||||
use common_telemetry::tracing_context::W3cTrace;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::ddl_manager::DdlManagerRef;
|
||||
use crate::error::{
|
||||
ParseProcedureIdSnafu, ProcedureNotFoundSnafu, QueryProcedureSnafu, Result, UnsupportedSnafu,
|
||||
};
|
||||
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use crate::rpc::procedure::{
|
||||
self, AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse,
|
||||
ProcedureStateResponse, RemoveRegionFollowerRequest,
|
||||
};
|
||||
|
||||
/// The context of procedure executor.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ExecutorContext {
|
||||
pub tracing_context: Option<W3cTrace>,
|
||||
}
|
||||
|
||||
/// The procedure executor that accepts ddl, region migration task etc.
|
||||
#[async_trait::async_trait]
|
||||
pub trait ProcedureExecutor: Send + Sync {
|
||||
/// Submit a ddl task
|
||||
async fn submit_ddl_task(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse>;
|
||||
|
||||
/// Add a region follower
|
||||
async fn add_region_follower(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
_request: AddRegionFollowerRequest,
|
||||
) -> Result<()> {
|
||||
UnsupportedSnafu {
|
||||
operation: "add_region_follower",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// Remove a region follower
|
||||
async fn remove_region_follower(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
_request: RemoveRegionFollowerRequest,
|
||||
) -> Result<()> {
|
||||
UnsupportedSnafu {
|
||||
operation: "remove_region_follower",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// Submit a region migration task
|
||||
async fn migrate_region(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: MigrateRegionRequest,
|
||||
) -> Result<MigrateRegionResponse>;
|
||||
|
||||
/// Query the procedure state by its id
|
||||
async fn query_procedure_state(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
pid: &str,
|
||||
) -> Result<ProcedureStateResponse>;
|
||||
|
||||
async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
|
||||
}
|
||||
|
||||
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
|
||||
|
||||
/// The local procedure executor that accepts ddl, region migration task etc.
|
||||
pub struct LocalProcedureExecutor {
|
||||
pub ddl_manager: DdlManagerRef,
|
||||
pub procedure_manager: ProcedureManagerRef,
|
||||
}
|
||||
|
||||
impl LocalProcedureExecutor {
|
||||
pub fn new(ddl_manager: DdlManagerRef, procedure_manager: ProcedureManagerRef) -> Self {
|
||||
Self {
|
||||
ddl_manager,
|
||||
procedure_manager,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ProcedureExecutor for LocalProcedureExecutor {
|
||||
async fn submit_ddl_task(
|
||||
&self,
|
||||
ctx: &ExecutorContext,
|
||||
request: SubmitDdlTaskRequest,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
self.ddl_manager.submit_ddl_task(ctx, request).await
|
||||
}
|
||||
|
||||
async fn migrate_region(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
_request: MigrateRegionRequest,
|
||||
) -> Result<MigrateRegionResponse> {
|
||||
UnsupportedSnafu {
|
||||
operation: "migrate_region",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn query_procedure_state(
|
||||
&self,
|
||||
_ctx: &ExecutorContext,
|
||||
pid: &str,
|
||||
) -> Result<ProcedureStateResponse> {
|
||||
let pid =
|
||||
ProcedureId::parse_str(pid).with_context(|_| ParseProcedureIdSnafu { key: pid })?;
|
||||
|
||||
let state = self
|
||||
.procedure_manager
|
||||
.procedure_state(pid)
|
||||
.await
|
||||
.context(QueryProcedureSnafu)?
|
||||
.with_context(|| ProcedureNotFoundSnafu {
|
||||
pid: pid.to_string(),
|
||||
})?;
|
||||
|
||||
Ok(procedure::procedure_state_to_pb_response(&state))
|
||||
}
|
||||
|
||||
async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
|
||||
let metas = self
|
||||
.procedure_manager
|
||||
.list_procedures()
|
||||
.await
|
||||
.context(QueryProcedureSnafu)?;
|
||||
Ok(procedure::procedure_details_to_pb_response(metas))
|
||||
}
|
||||
}
|
||||
@@ -23,4 +23,5 @@ pub(crate) mod reconcile_table;
|
||||
pub(crate) mod reconcile_logical_tables;
|
||||
// TODO(weny): Remove it
|
||||
#[allow(dead_code)]
|
||||
pub(crate) mod reconcile_catalog;
|
||||
pub(crate) mod utils;
|
||||
|
||||
198
src/common/meta/src/reconciliation/reconcile_catalog.rs
Normal file
198
src/common/meta/src/reconciliation/reconcile_catalog.rs
Normal file
@@ -0,0 +1,198 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use common_procedure::error::FromJsonSnafu;
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, ProcedureId,
|
||||
Result as ProcedureResult, Status,
|
||||
};
|
||||
use futures::stream::BoxStream;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::cache_invalidator::CacheInvalidatorRef;
|
||||
use crate::error::Result;
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::lock_key::CatalogLock;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::reconciliation::reconcile_catalog::start::ReconcileCatalogStart;
|
||||
use crate::reconciliation::reconcile_database::utils::wait_for_inflight_subprocedures;
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
use crate::reconciliation::utils::Context;
|
||||
|
||||
pub(crate) mod end;
|
||||
pub(crate) mod reconcile_databases;
|
||||
pub(crate) mod start;
|
||||
|
||||
pub(crate) struct ReconcileCatalogContext {
|
||||
pub node_manager: NodeManagerRef,
|
||||
pub table_metadata_manager: TableMetadataManagerRef,
|
||||
pub cache_invalidator: CacheInvalidatorRef,
|
||||
persistent_ctx: PersistentContext,
|
||||
volatile_ctx: VolatileContext,
|
||||
}
|
||||
|
||||
impl ReconcileCatalogContext {
|
||||
pub fn new(ctx: Context, persistent_ctx: PersistentContext) -> Self {
|
||||
Self {
|
||||
node_manager: ctx.node_manager,
|
||||
table_metadata_manager: ctx.table_metadata_manager,
|
||||
cache_invalidator: ctx.cache_invalidator,
|
||||
persistent_ctx,
|
||||
volatile_ctx: VolatileContext::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn wait_for_inflight_subprocedure(
|
||||
&mut self,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<()> {
|
||||
if let Some(procedure_id) = self.volatile_ctx.inflight_subprocedure {
|
||||
wait_for_inflight_subprocedures(
|
||||
procedure_ctx,
|
||||
&[procedure_id],
|
||||
self.persistent_ctx.fast_fail,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct PersistentContext {
|
||||
catalog: String,
|
||||
fast_fail: bool,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
}
|
||||
|
||||
impl PersistentContext {
|
||||
pub fn new(catalog: String, fast_fail: bool, resolve_strategy: ResolveStrategy) -> Self {
|
||||
Self {
|
||||
catalog,
|
||||
fast_fail,
|
||||
resolve_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct VolatileContext {
|
||||
/// Stores the stream of catalogs.
|
||||
schemas: Option<BoxStream<'static, Result<String>>>,
|
||||
/// Stores the inflight subprocedure.
|
||||
inflight_subprocedure: Option<ProcedureId>,
|
||||
}
|
||||
|
||||
pub struct ReconcileCatalogProcedure {
|
||||
pub context: ReconcileCatalogContext,
|
||||
state: Box<dyn State>,
|
||||
}
|
||||
|
||||
impl ReconcileCatalogProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::ReconcileCatalog";
|
||||
|
||||
pub fn new(
|
||||
ctx: Context,
|
||||
catalog: String,
|
||||
fast_fail: bool,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
) -> Self {
|
||||
let persistent_ctx = PersistentContext::new(catalog, fast_fail, resolve_strategy);
|
||||
let context = ReconcileCatalogContext::new(ctx, persistent_ctx);
|
||||
let state = Box::new(ReconcileCatalogStart);
|
||||
Self { context, state }
|
||||
}
|
||||
|
||||
pub(crate) fn from_json(ctx: Context, json: &str) -> ProcedureResult<Self> {
|
||||
let ProcedureDataOwned {
|
||||
state,
|
||||
persistent_ctx,
|
||||
} = serde_json::from_str(json).context(FromJsonSnafu)?;
|
||||
let context = ReconcileCatalogContext::new(ctx, persistent_ctx);
|
||||
Ok(Self { context, state })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ProcedureData<'a> {
|
||||
state: &'a dyn State,
|
||||
persistent_ctx: &'a PersistentContext,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ProcedureDataOwned {
|
||||
state: Box<dyn State>,
|
||||
persistent_ctx: PersistentContext,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Procedure for ReconcileCatalogProcedure {
|
||||
fn type_name(&self) -> &str {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &mut self.state;
|
||||
|
||||
match state.next(&mut self.context, _ctx).await {
|
||||
Ok((next, status)) => {
|
||||
*state = next;
|
||||
Ok(status)
|
||||
}
|
||||
Err(e) => {
|
||||
if e.is_retry_later() {
|
||||
Err(ProcedureError::retry_later(e))
|
||||
} else {
|
||||
Err(ProcedureError::external(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
let data = ProcedureData {
|
||||
state: self.state.as_ref(),
|
||||
persistent_ctx: &self.context.persistent_ctx,
|
||||
};
|
||||
serde_json::to_string(&data).context(FromJsonSnafu)
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let catalog = &self.context.persistent_ctx.catalog;
|
||||
|
||||
LockKey::new(vec![CatalogLock::Write(catalog).into()])
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde(tag = "reconcile_catalog_state")]
|
||||
pub(crate) trait State: Sync + Send + Debug {
|
||||
fn name(&self) -> &'static str {
|
||||
let type_name = std::any::type_name::<Self>();
|
||||
// short name
|
||||
type_name.split("::").last().unwrap_or(type_name)
|
||||
}
|
||||
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut ReconcileCatalogContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)>;
|
||||
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
40
src/common/meta/src/reconciliation/reconcile_catalog/end.rs
Normal file
40
src/common/meta/src/reconciliation/reconcile_catalog/end.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ReconcileCatalogEnd;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for ReconcileCatalogEnd {
|
||||
async fn next(
|
||||
&mut self,
|
||||
_ctx: &mut ReconcileCatalogContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
Ok((Box::new(ReconcileCatalogEnd), Status::done()))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, ProcedureWithId, Status};
|
||||
use futures::TryStreamExt;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::reconciliation::reconcile_catalog::end::ReconcileCatalogEnd;
|
||||
use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
|
||||
use crate::reconciliation::reconcile_database::{ReconcileDatabaseProcedure, DEFAULT_PARALLELISM};
|
||||
use crate::reconciliation::utils::Context;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ReconcileDatabases;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for ReconcileDatabases {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut ReconcileCatalogContext,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
// Waits for inflight subprocedure first.
|
||||
ctx.wait_for_inflight_subprocedure(procedure_ctx).await?;
|
||||
|
||||
if ctx.volatile_ctx.schemas.as_deref().is_none() {
|
||||
let schemas = ctx
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.schema_names(&ctx.persistent_ctx.catalog);
|
||||
ctx.volatile_ctx.schemas = Some(schemas);
|
||||
}
|
||||
|
||||
if let Some(catalog) = ctx
|
||||
.volatile_ctx
|
||||
.schemas
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.try_next()
|
||||
.await?
|
||||
{
|
||||
return Self::schedule_reconcile_database(ctx, catalog);
|
||||
}
|
||||
|
||||
Ok((Box::new(ReconcileCatalogEnd), Status::executing(false)))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl ReconcileDatabases {
|
||||
fn schedule_reconcile_database(
|
||||
ctx: &mut ReconcileCatalogContext,
|
||||
schema: String,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let context = Context {
|
||||
node_manager: ctx.node_manager.clone(),
|
||||
table_metadata_manager: ctx.table_metadata_manager.clone(),
|
||||
cache_invalidator: ctx.cache_invalidator.clone(),
|
||||
};
|
||||
let procedure = ReconcileDatabaseProcedure::new(
|
||||
context,
|
||||
ctx.persistent_ctx.catalog.clone(),
|
||||
schema,
|
||||
ctx.persistent_ctx.fast_fail,
|
||||
DEFAULT_PARALLELISM,
|
||||
ctx.persistent_ctx.resolve_strategy,
|
||||
true,
|
||||
);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
|
||||
Ok((
|
||||
Box::new(ReconcileDatabases),
|
||||
Status::suspended(vec![procedure_with_id], false),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::catalog_name::CatalogNameKey;
|
||||
use crate::reconciliation::reconcile_catalog::reconcile_databases::ReconcileDatabases;
|
||||
use crate::reconciliation::reconcile_catalog::{ReconcileCatalogContext, State};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ReconcileCatalogStart;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for ReconcileCatalogStart {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut ReconcileCatalogContext,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let exists = ctx
|
||||
.table_metadata_manager
|
||||
.catalog_manager()
|
||||
.exists(CatalogNameKey {
|
||||
catalog: &ctx.persistent_ctx.catalog,
|
||||
})
|
||||
.await?;
|
||||
|
||||
ensure!(
|
||||
exists,
|
||||
error::CatalogNotFoundSnafu {
|
||||
catalog: &ctx.persistent_ctx.catalog
|
||||
},
|
||||
);
|
||||
|
||||
Ok((Box::new(ReconcileDatabases), Status::executing(true)))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -45,6 +45,8 @@ use crate::reconciliation::reconcile_database::utils::wait_for_inflight_subproce
|
||||
use crate::reconciliation::reconcile_table::resolve_column_metadata::ResolveStrategy;
|
||||
use crate::reconciliation::utils::Context;
|
||||
|
||||
pub(crate) const DEFAULT_PARALLELISM: usize = 64;
|
||||
|
||||
pub(crate) struct ReconcileDatabaseContext {
|
||||
pub node_manager: NodeManagerRef,
|
||||
pub table_metadata_manager: TableMetadataManagerRef,
|
||||
@@ -89,6 +91,7 @@ pub(crate) struct PersistentContext {
|
||||
fail_fast: bool,
|
||||
parallelism: usize,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
is_subprocedure: bool,
|
||||
}
|
||||
|
||||
impl PersistentContext {
|
||||
@@ -98,6 +101,7 @@ impl PersistentContext {
|
||||
fail_fast: bool,
|
||||
parallelism: usize,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
is_subprocedure: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog,
|
||||
@@ -105,6 +109,7 @@ impl PersistentContext {
|
||||
fail_fast,
|
||||
parallelism,
|
||||
resolve_strategy,
|
||||
is_subprocedure,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -139,9 +144,16 @@ impl ReconcileDatabaseProcedure {
|
||||
fail_fast: bool,
|
||||
parallelism: usize,
|
||||
resolve_strategy: ResolveStrategy,
|
||||
is_subprocedure: bool,
|
||||
) -> Self {
|
||||
let persistent_ctx =
|
||||
PersistentContext::new(catalog, schema, fail_fast, parallelism, resolve_strategy);
|
||||
let persistent_ctx = PersistentContext::new(
|
||||
catalog,
|
||||
schema,
|
||||
fail_fast,
|
||||
parallelism,
|
||||
resolve_strategy,
|
||||
is_subprocedure,
|
||||
);
|
||||
let context = ReconcileDatabaseContext::new(ctx, persistent_ctx);
|
||||
let state = Box::new(ReconcileDatabaseStart);
|
||||
Self { context, state }
|
||||
@@ -204,6 +216,10 @@ impl Procedure for ReconcileDatabaseProcedure {
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let catalog = &self.context.persistent_ctx.catalog;
|
||||
let schema = &self.context.persistent_ctx.schema;
|
||||
// If the procedure is a subprocedure, only lock the schema.
|
||||
if self.context.persistent_ctx.is_subprocedure {
|
||||
return LockKey::new(vec![SchemaLock::write(catalog, schema).into()]);
|
||||
}
|
||||
|
||||
LockKey::new(vec![
|
||||
CatalogLock::Read(catalog).into(),
|
||||
|
||||
@@ -28,6 +28,7 @@ use crate::error::{Result, TableInfoNotFoundSnafu};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::reconciliation::reconcile_database::end::ReconcileDatabaseEnd;
|
||||
use crate::reconciliation::reconcile_database::{ReconcileDatabaseContext, State};
|
||||
use crate::reconciliation::reconcile_logical_tables::ReconcileLogicalTablesProcedure;
|
||||
use crate::reconciliation::utils::Context;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -201,7 +202,7 @@ impl ReconcileLogicalTables {
|
||||
async fn build_reconcile_logical_tables_procedure(
|
||||
ctx: &Context,
|
||||
physical_table_id: TableId,
|
||||
_logical_tables: Vec<(TableId, TableName)>,
|
||||
logical_tables: Vec<(TableId, TableName)>,
|
||||
) -> Result<ProcedureWithId> {
|
||||
let table_info = ctx
|
||||
.table_metadata_manager
|
||||
@@ -212,8 +213,16 @@ impl ReconcileLogicalTables {
|
||||
table: format!("table_id: {}", physical_table_id),
|
||||
})?;
|
||||
|
||||
let _physical_table_name = table_info.table_name();
|
||||
todo!()
|
||||
let physical_table_name = table_info.table_name();
|
||||
let procedure = ReconcileLogicalTablesProcedure::new(
|
||||
ctx.clone(),
|
||||
physical_table_id,
|
||||
physical_table_name,
|
||||
logical_tables,
|
||||
true,
|
||||
);
|
||||
|
||||
Ok(ProcedureWithId::with_random_id(Box::new(procedure)))
|
||||
}
|
||||
|
||||
fn enqueue_logical_table(
|
||||
|
||||
@@ -22,14 +22,12 @@ use snafu::{ensure, OptionExt};
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::{RawTableInfo, RawTableMeta};
|
||||
use table::table_name::TableName;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::cache_invalidator::CacheInvalidatorRef;
|
||||
use crate::error::{
|
||||
self, MismatchColumnIdSnafu, MissingColumnInColumnMetadataSnafu, Result, UnexpectedSnafu,
|
||||
MismatchColumnIdSnafu, MissingColumnInColumnMetadataSnafu, Result, UnexpectedSnafu,
|
||||
};
|
||||
use crate::key::table_name::{TableNameKey, TableNameManager};
|
||||
use crate::key::TableMetadataManagerRef;
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
|
||||
@@ -397,87 +395,6 @@ pub(crate) fn build_table_meta_from_column_metadatas(
|
||||
Ok(new_raw_table_meta)
|
||||
}
|
||||
|
||||
/// Validates the table id and name consistency.
|
||||
///
|
||||
/// It will check the table id and table name consistency.
|
||||
/// If the table id and table name are not consistent, it will return an error.
|
||||
pub(crate) async fn validate_table_id_and_name(
|
||||
table_name_manager: &TableNameManager,
|
||||
table_id: TableId,
|
||||
table_name: &TableName,
|
||||
) -> Result<()> {
|
||||
let table_name_key = TableNameKey::new(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
);
|
||||
let table_name_value = table_name_manager
|
||||
.get(table_name_key)
|
||||
.await?
|
||||
.with_context(|| error::TableNotFoundSnafu {
|
||||
table_name: table_name.to_string(),
|
||||
})?;
|
||||
|
||||
ensure!(
|
||||
table_name_value.table_id() == table_id,
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"The table id mismatch for table: {}, expected {}, actual {}",
|
||||
table_name,
|
||||
table_id,
|
||||
table_name_value.table_id()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks whether the column metadata invariants hold for the logical table.
|
||||
///
|
||||
/// Invariants:
|
||||
/// - Primary key (Tag) columns must exist in the new metadata.
|
||||
/// - Timestamp column must remain exactly the same in name and ID.
|
||||
///
|
||||
/// TODO(weny): add tests
|
||||
pub(crate) fn check_column_metadatas_invariants_for_logical_table(
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
table_info: &RawTableInfo,
|
||||
) -> bool {
|
||||
let new_primary_keys = column_metadatas
|
||||
.iter()
|
||||
.filter(|c| c.semantic_type == SemanticType::Tag)
|
||||
.map(|c| c.column_schema.name.as_str())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let old_primary_keys = table_info
|
||||
.meta
|
||||
.primary_key_indices
|
||||
.iter()
|
||||
.map(|i| table_info.meta.schema.column_schemas[*i].name.as_str());
|
||||
|
||||
for name in old_primary_keys {
|
||||
if !new_primary_keys.contains(name) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let old_timestamp_column_name = table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas
|
||||
.iter()
|
||||
.find(|c| c.is_time_index())
|
||||
.map(|c| c.name.as_str());
|
||||
|
||||
let new_timestamp_column_name = column_metadatas
|
||||
.iter()
|
||||
.find(|c| c.semantic_type == SemanticType::Timestamp)
|
||||
.map(|c| c.column_schema.name.as_str());
|
||||
|
||||
old_timestamp_column_name != new_timestamp_column_name
|
||||
}
|
||||
|
||||
/// Returns true if the logical table info needs to be updated.
|
||||
///
|
||||
/// The logical table only support to add columns, so we can check the length of column metadatas
|
||||
|
||||
@@ -13,3 +13,4 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod datanode;
|
||||
pub mod memory;
|
||||
|
||||
33
src/common/options/src/memory.rs
Normal file
33
src/common/options/src/memory.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct MemoryOptions {
|
||||
/// Whether to enable heap profiling activation.
|
||||
/// When enabled, heap profiling will be activated if the `MALLOC_CONF` environment variable
|
||||
/// is set to "prof:true,prof_active:false". The official image adds this env variable.
|
||||
/// Default is true.
|
||||
pub enable_heap_profiling: bool,
|
||||
}
|
||||
|
||||
impl Default for MemoryOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enable_heap_profiling: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,7 @@ common-greptimedb-telemetry.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-procedure.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_config::{Configurable, DEFAULT_DATA_HOME};
|
||||
use common_options::memory::MemoryOptions;
|
||||
pub use common_procedure::options::ProcedureConfig;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
@@ -85,6 +86,7 @@ pub struct DatanodeOptions {
|
||||
pub export_metrics: ExportMetricsOption,
|
||||
pub tracing: TracingOptions,
|
||||
pub query: QueryOptions,
|
||||
pub memory: MemoryOptions,
|
||||
|
||||
/// Deprecated options, please use the new options instead.
|
||||
#[deprecated(note = "Please use `grpc.addr` instead.")]
|
||||
@@ -131,6 +133,7 @@ impl Default for DatanodeOptions {
|
||||
export_metrics: ExportMetricsOption::default(),
|
||||
tracing: TracingOptions::default(),
|
||||
query: QueryOptions::default(),
|
||||
memory: MemoryOptions::default(),
|
||||
|
||||
// Deprecated options
|
||||
rpc_addr: None,
|
||||
|
||||
@@ -28,6 +28,7 @@ common-function.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
|
||||
@@ -24,6 +24,7 @@ use api::v1::{RowDeleteRequest, RowDeleteRequests, RowInsertRequest, RowInsertRe
|
||||
use common_config::Configurable;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_options::memory::MemoryOptions;
|
||||
use common_runtime::JoinHandle;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
use common_telemetry::{debug, info, trace};
|
||||
@@ -111,6 +112,7 @@ pub struct FlownodeOptions {
|
||||
pub heartbeat: HeartbeatOptions,
|
||||
pub query: QueryOptions,
|
||||
pub user_provider: Option<String>,
|
||||
pub memory: MemoryOptions,
|
||||
}
|
||||
|
||||
impl Default for FlownodeOptions {
|
||||
@@ -131,6 +133,7 @@ impl Default for FlownodeOptions {
|
||||
allow_query_fallback: false,
|
||||
},
|
||||
user_provider: None,
|
||||
memory: MemoryOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,11 +24,11 @@ use catalog::CatalogManagerRef;
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::{LayeredCacheRegistryRef, TableFlownodeSetCacheRef, TableRouteCacheRef};
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::key::flow::FlowMetadataManagerRef;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::{Flownode, NodeManagerRef};
|
||||
use common_meta::procedure_executor::ProcedureExecutorRef;
|
||||
use common_query::Output;
|
||||
use common_runtime::JoinHandle;
|
||||
use common_telemetry::tracing::info;
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_config::config::Configurable;
|
||||
use common_options::datanode::DatanodeClientOptions;
|
||||
use common_options::memory::MemoryOptions;
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions};
|
||||
use meta_client::MetaClientOptions;
|
||||
use query::options::QueryOptions;
|
||||
@@ -62,6 +63,7 @@ pub struct FrontendOptions {
|
||||
pub query: QueryOptions,
|
||||
pub max_in_flight_write_bytes: Option<ReadableSize>,
|
||||
pub slow_query: Option<SlowQueryOptions>,
|
||||
pub memory: MemoryOptions,
|
||||
}
|
||||
|
||||
impl Default for FrontendOptions {
|
||||
@@ -88,6 +90,7 @@ impl Default for FrontendOptions {
|
||||
query: QueryOptions::default(),
|
||||
max_in_flight_write_bytes: None,
|
||||
slow_query: Some(SlowQueryOptions::default()),
|
||||
memory: MemoryOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,12 +40,12 @@ use common_base::Plugins;
|
||||
use common_config::KvBackendConfig;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::key::runtime_switch::RuntimeSwitchManager;
|
||||
use common_meta::key::table_name::TableNameKey;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::procedure_executor::ProcedureExecutorRef;
|
||||
use common_meta::state_store::KvStateStore;
|
||||
use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
use common_procedure::options::ProcedureConfig;
|
||||
|
||||
@@ -20,11 +20,11 @@ use catalog::CatalogManagerRef;
|
||||
use common_base::Plugins;
|
||||
use common_meta::cache::{LayeredCacheRegistryRef, TableRouteCacheRef};
|
||||
use common_meta::cache_invalidator::{CacheInvalidatorRef, DummyCacheInvalidator};
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::procedure_executor::ProcedureExecutorRef;
|
||||
use dashmap::DashMap;
|
||||
use operator::delete::Deleter;
|
||||
use operator::flow::FlowServiceOperator;
|
||||
@@ -158,7 +158,8 @@ impl FrontendBuilder {
|
||||
self.catalog_manager.clone(),
|
||||
));
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let flow_metadata_manager: Arc<FlowMetadataManager> =
|
||||
Arc::new(FlowMetadataManager::new(kv_backend.clone()));
|
||||
let flow_service = FlowServiceOperator::new(flow_metadata_manager, node_manager.clone());
|
||||
|
||||
let query_engine = QueryEngineFactory::new_with_plugins(
|
||||
|
||||
@@ -72,7 +72,7 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.unwrap_or_default();
|
||||
metric_ctx.is_legacy = is_legacy;
|
||||
|
||||
let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request, &metric_ctx)?;
|
||||
let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request, &mut metric_ctx)?;
|
||||
OTLP_METRICS_ROWS.inc_by(rows as u64);
|
||||
|
||||
let ctx = if !is_legacy {
|
||||
|
||||
@@ -22,7 +22,7 @@ fst.workspace = true
|
||||
futures.workspace = true
|
||||
greptime-proto.workspace = true
|
||||
itertools.workspace = true
|
||||
jieba-rs = "0.7"
|
||||
jieba-rs = "0.8"
|
||||
lazy_static.workspace = true
|
||||
mockall.workspace = true
|
||||
pin-project.workspace = true
|
||||
@@ -34,8 +34,8 @@ roaring = "0.10"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
tantivy = { version = "0.22", features = ["zstd-compression"] }
|
||||
tantivy-jieba = "0.11.0"
|
||||
tantivy = { version = "0.24", features = ["zstd-compression"] }
|
||||
tantivy-jieba = "0.16"
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
@@ -34,12 +34,12 @@ use common_meta::cluster::{
|
||||
ClusterInfo, MetasrvStatus, NodeInfo, NodeInfoKey, NodeStatus, Role as ClusterRole,
|
||||
};
|
||||
use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue, RegionStat};
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::error::{
|
||||
self as meta_error, ExternalSnafu, Result as MetaResult, UnsupportedSnafu,
|
||||
};
|
||||
use common_meta::key::flow::flow_state::{FlowStat, FlowStateManager};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::procedure_executor::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::range_stream::PaginationStream;
|
||||
use common_meta::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use common_meta::rpc::procedure::{
|
||||
|
||||
@@ -21,11 +21,16 @@ use api::v1::meta::procedure_service_server::ProcedureServiceServer;
|
||||
use api::v1::meta::store_server::StoreServer;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_error::ext::BoxedError;
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
use common_meta::distributed_time_constants::META_LEASE_SECS;
|
||||
use common_meta::kv_backend::chroot::ChrootKvBackend;
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
use common_meta::kv_backend::rds::postgres::create_postgres_tls_connector;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
use common_meta::kv_backend::rds::postgres::{TlsMode as PgTlsMode, TlsOption as PgTlsOption};
|
||||
#[cfg(feature = "mysql_kvbackend")]
|
||||
use common_meta::kv_backend::rds::MySqlStore;
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
@@ -41,6 +46,7 @@ use servers::export_metrics::ExportMetricsTask;
|
||||
use servers::http::{HttpServer, HttpServerBuilder};
|
||||
use servers::metrics_handler::MetricsHandler;
|
||||
use servers::server::Server;
|
||||
use servers::tls::TlsOption;
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
@@ -310,7 +316,8 @@ pub async fn metasrv_builder(
|
||||
cfg.keepalives = Some(true);
|
||||
cfg.keepalives_idle = Some(Duration::from_secs(POSTGRES_KEEP_ALIVE_SECS));
|
||||
// We use a separate pool for election since we need a different session keep-alive idle time.
|
||||
let pool = create_postgres_pool_with(&opts.store_addrs, cfg).await?;
|
||||
let pool =
|
||||
create_postgres_pool_with(&opts.store_addrs, cfg, opts.backend_tls.clone()).await?;
|
||||
|
||||
let election_client = ElectionPgClient::new(
|
||||
pool,
|
||||
@@ -329,7 +336,7 @@ pub async fn metasrv_builder(
|
||||
)
|
||||
.await?;
|
||||
|
||||
let pool = create_postgres_pool(&opts.store_addrs).await?;
|
||||
let pool = create_postgres_pool(&opts.store_addrs, opts.backend_tls.clone()).await?;
|
||||
let kv_backend = PgStore::with_pg_pool(pool, &opts.meta_table_name, opts.max_txn_ops)
|
||||
.await
|
||||
.context(error::KvBackendSnafu)?;
|
||||
@@ -440,28 +447,64 @@ pub async fn create_etcd_client(store_addrs: &[String]) -> Result<Client> {
|
||||
}
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
/// Creates a pool for the Postgres backend.
|
||||
///
|
||||
/// It only use first store addr to create a pool.
|
||||
pub async fn create_postgres_pool(store_addrs: &[String]) -> Result<deadpool_postgres::Pool> {
|
||||
create_postgres_pool_with(store_addrs, Config::new()).await
|
||||
/// Converts servers::tls::TlsOption to postgres::TlsOption to avoid circular dependencies
|
||||
fn convert_tls_option(tls_option: &TlsOption) -> PgTlsOption {
|
||||
let mode = match tls_option.mode {
|
||||
servers::tls::TlsMode::Disable => PgTlsMode::Disable,
|
||||
servers::tls::TlsMode::Prefer => PgTlsMode::Prefer,
|
||||
servers::tls::TlsMode::Require => PgTlsMode::Require,
|
||||
servers::tls::TlsMode::VerifyCa => PgTlsMode::VerifyCa,
|
||||
servers::tls::TlsMode::VerifyFull => PgTlsMode::VerifyFull,
|
||||
};
|
||||
|
||||
PgTlsOption {
|
||||
mode,
|
||||
cert_path: tls_option.cert_path.clone(),
|
||||
key_path: tls_option.key_path.clone(),
|
||||
ca_cert_path: tls_option.ca_cert_path.clone(),
|
||||
watch: tls_option.watch,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
/// Creates a pool for the Postgres backend.
|
||||
/// Creates a pool for the Postgres backend with optional TLS.
|
||||
///
|
||||
/// It only use first store addr to create a pool.
|
||||
pub async fn create_postgres_pool(
|
||||
store_addrs: &[String],
|
||||
tls_config: Option<TlsOption>,
|
||||
) -> Result<deadpool_postgres::Pool> {
|
||||
create_postgres_pool_with(store_addrs, Config::new(), tls_config).await
|
||||
}
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
/// Creates a pool for the Postgres backend with config and optional TLS.
|
||||
///
|
||||
/// It only use first store addr to create a pool, and use the given config to create a pool.
|
||||
pub async fn create_postgres_pool_with(
|
||||
store_addrs: &[String],
|
||||
mut cfg: Config,
|
||||
tls_config: Option<TlsOption>,
|
||||
) -> Result<deadpool_postgres::Pool> {
|
||||
let postgres_url = store_addrs.first().context(error::InvalidArgumentsSnafu {
|
||||
err_msg: "empty store addrs",
|
||||
})?;
|
||||
cfg.url = Some(postgres_url.to_string());
|
||||
let pool = cfg
|
||||
.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(error::CreatePostgresPoolSnafu)?;
|
||||
|
||||
let pool = if let Some(tls_config) = tls_config {
|
||||
let pg_tls_config = convert_tls_option(&tls_config);
|
||||
let tls_connector =
|
||||
create_postgres_tls_connector(&pg_tls_config).map_err(|e| error::Error::Other {
|
||||
source: BoxedError::new(e),
|
||||
location: snafu::Location::new(file!(), line!(), 0),
|
||||
})?;
|
||||
cfg.create_pool(Some(Runtime::Tokio1), tls_connector)
|
||||
.context(error::CreatePostgresPoolSnafu)?
|
||||
} else {
|
||||
cfg.create_pool(Some(Runtime::Tokio1), NoTls)
|
||||
.context(error::CreatePostgresPoolSnafu)?
|
||||
};
|
||||
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
|
||||
@@ -819,7 +819,7 @@ mod tests {
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
let pool = create_postgres_pool(&[endpoint]).await.unwrap();
|
||||
let pool = create_postgres_pool(&[endpoint], None).await.unwrap();
|
||||
let mut pg_client = ElectionPgClient::new(
|
||||
pool,
|
||||
execution_timeout,
|
||||
|
||||
@@ -25,7 +25,7 @@ use common_base::Plugins;
|
||||
use common_config::{Configurable, DEFAULT_DATA_HOME};
|
||||
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::ddl_manager::DdlManagerRef;
|
||||
use common_meta::distributed_time_constants;
|
||||
use common_meta::key::runtime_switch::RuntimeSwitchManagerRef;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
@@ -40,6 +40,7 @@ use common_meta::region_registry::LeaderRegionRegistryRef;
|
||||
use common_meta::sequence::SequenceRef;
|
||||
use common_meta::wal_options_allocator::WalOptionsAllocatorRef;
|
||||
use common_options::datanode::DatanodeClientOptions;
|
||||
use common_options::memory::MemoryOptions;
|
||||
use common_procedure::options::ProcedureConfig;
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
@@ -49,6 +50,7 @@ use serde::{Deserialize, Serialize};
|
||||
use servers::export_metrics::ExportMetricsOption;
|
||||
use servers::grpc::GrpcOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::TlsOption;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
@@ -105,6 +107,10 @@ pub struct MetasrvOptions {
|
||||
pub server_addr: String,
|
||||
/// The address of the store, e.g., etcd.
|
||||
pub store_addrs: Vec<String>,
|
||||
/// TLS configuration for kv store backend (PostgreSQL/MySQL)
|
||||
/// Only applicable when using PostgreSQL or MySQL as the metadata store
|
||||
#[serde(default)]
|
||||
pub backend_tls: Option<TlsOption>,
|
||||
/// The type of selector.
|
||||
pub selector: SelectorType,
|
||||
/// Whether to use the memory store.
|
||||
@@ -160,6 +166,8 @@ pub struct MetasrvOptions {
|
||||
pub flush_stats_factor: usize,
|
||||
/// The tracing options.
|
||||
pub tracing: TracingOptions,
|
||||
/// The memory options.
|
||||
pub memory: MemoryOptions,
|
||||
/// The datastore for kv metadata.
|
||||
pub backend: BackendImpl,
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
@@ -177,6 +185,7 @@ impl fmt::Debug for MetasrvOptions {
|
||||
let mut debug_struct = f.debug_struct("MetasrvOptions");
|
||||
debug_struct
|
||||
.field("store_addrs", &self.sanitize_store_addrs())
|
||||
.field("backend_tls", &self.backend_tls)
|
||||
.field("selector", &self.selector)
|
||||
.field("use_memory_store", &self.use_memory_store)
|
||||
.field("enable_region_failover", &self.enable_region_failover)
|
||||
@@ -222,6 +231,7 @@ impl Default for MetasrvOptions {
|
||||
#[allow(deprecated)]
|
||||
server_addr: String::new(),
|
||||
store_addrs: vec!["127.0.0.1:2379".to_string()],
|
||||
backend_tls: None,
|
||||
selector: SelectorType::default(),
|
||||
use_memory_store: false,
|
||||
enable_region_failover: false,
|
||||
@@ -251,6 +261,7 @@ impl Default for MetasrvOptions {
|
||||
max_txn_ops: 128,
|
||||
flush_stats_factor: 3,
|
||||
tracing: TracingOptions::default(),
|
||||
memory: MemoryOptions::default(),
|
||||
backend: BackendImpl::EtcdStore,
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
meta_table_name: common_meta::kv_backend::DEFAULT_META_TABLE_NAME.to_string(),
|
||||
@@ -432,7 +443,7 @@ pub struct Metasrv {
|
||||
election: Option<ElectionRef>,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
mailbox: MailboxRef,
|
||||
procedure_executor: ProcedureExecutorRef,
|
||||
ddl_manager: DdlManagerRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
runtime_switch_manager: RuntimeSwitchManagerRef,
|
||||
@@ -683,8 +694,8 @@ impl Metasrv {
|
||||
&self.mailbox
|
||||
}
|
||||
|
||||
pub fn procedure_executor(&self) -> &ProcedureExecutorRef {
|
||||
&self.procedure_executor
|
||||
pub fn ddl_manager(&self) -> &DdlManagerRef {
|
||||
&self.ddl_manager
|
||||
}
|
||||
|
||||
pub fn procedure_manager(&self) -> &ProcedureManagerRef {
|
||||
|
||||
@@ -464,7 +464,7 @@ impl MetasrvBuilder {
|
||||
election,
|
||||
procedure_manager,
|
||||
mailbox,
|
||||
procedure_executor: ddl_manager,
|
||||
ddl_manager,
|
||||
wal_options_allocator,
|
||||
table_metadata_manager,
|
||||
runtime_switch_manager,
|
||||
|
||||
@@ -21,7 +21,7 @@ use api::v1::meta::{
|
||||
ProcedureDetailRequest, ProcedureDetailResponse, ProcedureStateResponse, QueryProcedureRequest,
|
||||
ResponseHeader,
|
||||
};
|
||||
use common_meta::ddl::ExecutorContext;
|
||||
use common_meta::procedure_executor::ExecutorContext;
|
||||
use common_meta::rpc::ddl::{DdlTask, SubmitDdlTaskRequest};
|
||||
use common_meta::rpc::procedure;
|
||||
use common_telemetry::warn;
|
||||
@@ -100,7 +100,7 @@ impl procedure_service_server::ProcedureService for Metasrv {
|
||||
.context(error::ConvertProtoDataSnafu)?;
|
||||
|
||||
let resp = self
|
||||
.procedure_executor()
|
||||
.ddl_manager()
|
||||
.submit_ddl_task(
|
||||
&ExecutorContext {
|
||||
tracing_context: Some(header.tracing_context),
|
||||
|
||||
@@ -21,10 +21,12 @@ use datafusion_common::Column;
|
||||
use datafusion_expr::{lit, Expr};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use mito2::memtable::bulk::part::BulkPartConverter;
|
||||
use mito2::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtable};
|
||||
use mito2::memtable::time_series::TimeSeriesMemtable;
|
||||
use mito2::memtable::{KeyValues, Memtable};
|
||||
use mito2::region::options::MergeMode;
|
||||
use mito2::sst::{to_flat_sst_arrow_schema, FlatSchemaOptions};
|
||||
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
|
||||
use mito_codec::row_converter::DensePrimaryKeyCodec;
|
||||
use rand::rngs::ThreadRng;
|
||||
@@ -38,7 +40,7 @@ use table::predicate::Predicate;
|
||||
|
||||
/// Writes rows.
|
||||
fn write_rows(c: &mut Criterion) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let metadata = Arc::new(memtable_util::metadata_with_primary_key(vec![1, 0], true));
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
|
||||
// Note that this test only generate one time series.
|
||||
@@ -359,5 +361,71 @@ fn cpu_metadata() -> RegionMetadata {
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
criterion_group!(benches, write_rows, full_scan, filter_1_host);
|
||||
fn bulk_part_converter(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let start_sec = 1710043200;
|
||||
|
||||
let mut group = c.benchmark_group("bulk_part_converter");
|
||||
|
||||
for &rows in &[1024, 2048, 4096, 8192] {
|
||||
// Benchmark without storing primary key columns (baseline)
|
||||
group.bench_with_input(format!("{}_rows_no_pk_columns", rows), &rows, |b, &rows| {
|
||||
b.iter(|| {
|
||||
let generator =
|
||||
CpuDataGenerator::new(metadata.clone(), rows, start_sec, start_sec + 1);
|
||||
let codec = Arc::new(DensePrimaryKeyCodec::new(&metadata));
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions {
|
||||
raw_pk_columns: false,
|
||||
string_pk_use_dict: false,
|
||||
},
|
||||
);
|
||||
let mut converter = BulkPartConverter::new(&metadata, schema, rows, codec, false);
|
||||
|
||||
if let Some(kvs) = generator.iter().next() {
|
||||
converter.append_key_values(&kvs).unwrap();
|
||||
}
|
||||
|
||||
let _bulk_part = converter.convert().unwrap();
|
||||
});
|
||||
});
|
||||
|
||||
// Benchmark with storing primary key columns
|
||||
group.bench_with_input(
|
||||
format!("{}_rows_with_pk_columns", rows),
|
||||
&rows,
|
||||
|b, &rows| {
|
||||
b.iter(|| {
|
||||
let generator =
|
||||
CpuDataGenerator::new(metadata.clone(), rows, start_sec, start_sec + 1);
|
||||
let codec = Arc::new(DensePrimaryKeyCodec::new(&metadata));
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions {
|
||||
raw_pk_columns: true,
|
||||
string_pk_use_dict: true,
|
||||
},
|
||||
);
|
||||
let mut converter =
|
||||
BulkPartConverter::new(&metadata, schema, rows, codec, true);
|
||||
|
||||
if let Some(kvs) = generator.iter().next() {
|
||||
converter.append_key_values(&kvs).unwrap();
|
||||
}
|
||||
|
||||
let _bulk_part = converter.convert().unwrap();
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
write_rows,
|
||||
full_scan,
|
||||
filter_1_host,
|
||||
bulk_part_converter,
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, with_instrument_layers};
|
||||
@@ -28,6 +29,7 @@ use crate::cache::write_cache::SstUploadRequest;
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::config::{BloomFilterConfig, FulltextIndexConfig, InvertedIndexConfig};
|
||||
use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu, Result};
|
||||
use crate::metrics::{COMPACTION_STAGE_ELAPSED, FLUSH_ELAPSED};
|
||||
use crate::read::Source;
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta, RegionFileId};
|
||||
@@ -43,6 +45,87 @@ pub type AccessLayerRef = Arc<AccessLayer>;
|
||||
/// SST write results.
|
||||
pub type SstInfoArray = SmallVec<[SstInfo; 2]>;
|
||||
|
||||
/// Write operation type.
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub enum WriteType {
|
||||
/// Writes from flush
|
||||
Flush,
|
||||
/// Writes from compaction.
|
||||
Compaction,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Metrics {
|
||||
pub(crate) write_type: WriteType,
|
||||
pub(crate) iter_source: Duration,
|
||||
pub(crate) write_batch: Duration,
|
||||
pub(crate) update_index: Duration,
|
||||
pub(crate) upload_parquet: Duration,
|
||||
pub(crate) upload_puffin: Duration,
|
||||
}
|
||||
|
||||
impl Metrics {
|
||||
pub(crate) fn new(write_type: WriteType) -> Self {
|
||||
Self {
|
||||
write_type,
|
||||
iter_source: Default::default(),
|
||||
write_batch: Default::default(),
|
||||
update_index: Default::default(),
|
||||
upload_parquet: Default::default(),
|
||||
upload_puffin: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn merge(mut self, other: Self) -> Self {
|
||||
assert_eq!(self.write_type, other.write_type);
|
||||
self.iter_source += other.iter_source;
|
||||
self.write_batch += other.write_batch;
|
||||
self.update_index += other.update_index;
|
||||
self.upload_parquet += other.upload_parquet;
|
||||
self.upload_puffin += other.upload_puffin;
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn observe(self) {
|
||||
match self.write_type {
|
||||
WriteType::Flush => {
|
||||
FLUSH_ELAPSED
|
||||
.with_label_values(&["iter_source"])
|
||||
.observe(self.iter_source.as_secs_f64());
|
||||
FLUSH_ELAPSED
|
||||
.with_label_values(&["write_batch"])
|
||||
.observe(self.write_batch.as_secs_f64());
|
||||
FLUSH_ELAPSED
|
||||
.with_label_values(&["update_index"])
|
||||
.observe(self.update_index.as_secs_f64());
|
||||
FLUSH_ELAPSED
|
||||
.with_label_values(&["upload_parquet"])
|
||||
.observe(self.upload_parquet.as_secs_f64());
|
||||
FLUSH_ELAPSED
|
||||
.with_label_values(&["upload_puffin"])
|
||||
.observe(self.upload_puffin.as_secs_f64());
|
||||
}
|
||||
WriteType::Compaction => {
|
||||
COMPACTION_STAGE_ELAPSED
|
||||
.with_label_values(&["iter_source"])
|
||||
.observe(self.iter_source.as_secs_f64());
|
||||
COMPACTION_STAGE_ELAPSED
|
||||
.with_label_values(&["write_batch"])
|
||||
.observe(self.write_batch.as_secs_f64());
|
||||
COMPACTION_STAGE_ELAPSED
|
||||
.with_label_values(&["update_index"])
|
||||
.observe(self.update_index.as_secs_f64());
|
||||
COMPACTION_STAGE_ELAPSED
|
||||
.with_label_values(&["upload_parquet"])
|
||||
.observe(self.upload_parquet.as_secs_f64());
|
||||
COMPACTION_STAGE_ELAPSED
|
||||
.with_label_values(&["upload_puffin"])
|
||||
.observe(self.upload_puffin.as_secs_f64());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// A layer to access SST files under the same directory.
|
||||
pub struct AccessLayer {
|
||||
table_dir: String,
|
||||
@@ -145,11 +228,12 @@ impl AccessLayer {
|
||||
&self,
|
||||
request: SstWriteRequest,
|
||||
write_opts: &WriteOptions,
|
||||
) -> Result<SstInfoArray> {
|
||||
write_type: WriteType,
|
||||
) -> Result<(SstInfoArray, Metrics)> {
|
||||
let region_id = request.metadata.region_id;
|
||||
let cache_manager = request.cache_manager.clone();
|
||||
|
||||
let sst_info = if let Some(write_cache) = cache_manager.write_cache() {
|
||||
let (sst_info, metrics) = if let Some(write_cache) = cache_manager.write_cache() {
|
||||
// Write to the write cache.
|
||||
write_cache
|
||||
.write_and_upload_sst(
|
||||
@@ -162,6 +246,7 @@ impl AccessLayer {
|
||||
remote_store: self.object_store.clone(),
|
||||
},
|
||||
write_opts,
|
||||
write_type,
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
@@ -190,12 +275,15 @@ impl AccessLayer {
|
||||
request.metadata,
|
||||
indexer_builder,
|
||||
path_provider,
|
||||
Metrics::new(write_type),
|
||||
)
|
||||
.await
|
||||
.with_file_cleaner(cleaner);
|
||||
writer
|
||||
let ssts = writer
|
||||
.write_all(request.source, request.max_sequence, write_opts)
|
||||
.await?
|
||||
.await?;
|
||||
let metrics = writer.into_metrics();
|
||||
(ssts, metrics)
|
||||
};
|
||||
|
||||
// Put parquet metadata to cache manager.
|
||||
@@ -210,7 +298,7 @@ impl AccessLayer {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(sst_info)
|
||||
Ok((sst_info, metrics))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
58
src/mito2/src/cache/write_cache.rs
vendored
58
src/mito2/src/cache/write_cache.rs
vendored
@@ -15,7 +15,7 @@
|
||||
//! A write-through cache for remote object stores.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_telemetry::{debug, info};
|
||||
@@ -25,14 +25,13 @@ use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::access_layer::{
|
||||
new_fs_cache_store, FilePathProvider, RegionFilePathFactory, SstInfoArray, SstWriteRequest,
|
||||
TempFileCleaner, WriteCachePathProvider,
|
||||
new_fs_cache_store, FilePathProvider, Metrics, RegionFilePathFactory, SstInfoArray,
|
||||
SstWriteRequest, TempFileCleaner, WriteCachePathProvider, WriteType,
|
||||
};
|
||||
use crate::cache::file_cache::{FileCache, FileCacheRef, FileType, IndexKey, IndexValue};
|
||||
use crate::error::{self, Result};
|
||||
use crate::metrics::{
|
||||
FLUSH_ELAPSED, UPLOAD_BYTES_TOTAL, WRITE_CACHE_DOWNLOAD_BYTES_TOTAL,
|
||||
WRITE_CACHE_DOWNLOAD_ELAPSED,
|
||||
UPLOAD_BYTES_TOTAL, WRITE_CACHE_DOWNLOAD_BYTES_TOTAL, WRITE_CACHE_DOWNLOAD_ELAPSED,
|
||||
};
|
||||
use crate::sst::file::RegionFileId;
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
@@ -108,11 +107,8 @@ impl WriteCache {
|
||||
write_request: SstWriteRequest,
|
||||
upload_request: SstUploadRequest,
|
||||
write_opts: &WriteOptions,
|
||||
) -> Result<SstInfoArray> {
|
||||
let timer = FLUSH_ELAPSED
|
||||
.with_label_values(&["write_sst"])
|
||||
.start_timer();
|
||||
|
||||
write_type: WriteType,
|
||||
) -> Result<(SstInfoArray, Metrics)> {
|
||||
let region_id = write_request.metadata.region_id;
|
||||
|
||||
let store = self.file_cache.local_store();
|
||||
@@ -138,6 +134,7 @@ impl WriteCache {
|
||||
write_request.metadata,
|
||||
indexer,
|
||||
path_provider.clone(),
|
||||
Metrics::new(write_type),
|
||||
)
|
||||
.await
|
||||
.with_file_cleaner(cleaner);
|
||||
@@ -145,12 +142,11 @@ impl WriteCache {
|
||||
let sst_info = writer
|
||||
.write_all(write_request.source, write_request.max_sequence, write_opts)
|
||||
.await?;
|
||||
|
||||
timer.stop_and_record();
|
||||
let mut metrics = writer.into_metrics();
|
||||
|
||||
// Upload sst file to remote object store.
|
||||
if sst_info.is_empty() {
|
||||
return Ok(sst_info);
|
||||
return Ok((sst_info, metrics));
|
||||
}
|
||||
|
||||
let mut upload_tracker = UploadTracker::new(region_id);
|
||||
@@ -161,10 +157,12 @@ impl WriteCache {
|
||||
let parquet_path = upload_request
|
||||
.dest_path_provider
|
||||
.build_sst_file_path(RegionFileId::new(region_id, sst.file_id));
|
||||
let start = Instant::now();
|
||||
if let Err(e) = self.upload(parquet_key, &parquet_path, remote_store).await {
|
||||
err = Some(e);
|
||||
break;
|
||||
}
|
||||
metrics.upload_parquet += start.elapsed();
|
||||
upload_tracker.push_uploaded_file(parquet_path);
|
||||
|
||||
if sst.index_metadata.file_size > 0 {
|
||||
@@ -172,10 +170,12 @@ impl WriteCache {
|
||||
let puffin_path = upload_request
|
||||
.dest_path_provider
|
||||
.build_index_file_path(RegionFileId::new(region_id, sst.file_id));
|
||||
let start = Instant::now();
|
||||
if let Err(e) = self.upload(puffin_key, &puffin_path, remote_store).await {
|
||||
err = Some(e);
|
||||
break;
|
||||
}
|
||||
metrics.upload_puffin += start.elapsed();
|
||||
upload_tracker.push_uploaded_file(puffin_path);
|
||||
}
|
||||
}
|
||||
@@ -188,7 +188,7 @@ impl WriteCache {
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
Ok(sst_info)
|
||||
Ok((sst_info, metrics))
|
||||
}
|
||||
|
||||
/// Removes a file from the cache by `index_key`.
|
||||
@@ -298,13 +298,7 @@ impl WriteCache {
|
||||
let file_type = index_key.file_type;
|
||||
let cache_path = self.file_cache.cache_file_path(index_key);
|
||||
|
||||
let timer = FLUSH_ELAPSED
|
||||
.with_label_values(&[match file_type {
|
||||
FileType::Parquet => "upload_parquet",
|
||||
FileType::Puffin => "upload_puffin",
|
||||
}])
|
||||
.start_timer();
|
||||
|
||||
let start = Instant::now();
|
||||
let cached_value = self
|
||||
.file_cache
|
||||
.local_store()
|
||||
@@ -348,11 +342,11 @@ impl WriteCache {
|
||||
UPLOAD_BYTES_TOTAL.inc_by(bytes_written);
|
||||
|
||||
debug!(
|
||||
"Successfully upload file to remote, region: {}, file: {}, upload_path: {}, cost: {:?}s",
|
||||
"Successfully upload file to remote, region: {}, file: {}, upload_path: {}, cost: {:?}",
|
||||
region_id,
|
||||
file_id,
|
||||
upload_path,
|
||||
timer.stop_and_record()
|
||||
start.elapsed(),
|
||||
);
|
||||
|
||||
let index_value = IndexValue {
|
||||
@@ -496,11 +490,11 @@ mod tests {
|
||||
};
|
||||
|
||||
// Write to cache and upload sst to mock remote store
|
||||
let sst_info = write_cache
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts)
|
||||
let (mut sst_infos, _) = write_cache
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts, WriteType::Flush)
|
||||
.await
|
||||
.unwrap()
|
||||
.remove(0); //todo(hl): we assume it only creates one file.
|
||||
.unwrap();
|
||||
let sst_info = sst_infos.remove(0);
|
||||
|
||||
let file_id = sst_info.file_id;
|
||||
let sst_upload_path =
|
||||
@@ -591,11 +585,11 @@ mod tests {
|
||||
remote_store: mock_store.clone(),
|
||||
};
|
||||
|
||||
let sst_info = write_cache
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts)
|
||||
let (mut sst_infos, _) = write_cache
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts, WriteType::Flush)
|
||||
.await
|
||||
.unwrap()
|
||||
.remove(0);
|
||||
.unwrap();
|
||||
let sst_info = sst_infos.remove(0);
|
||||
let write_parquet_metadata = sst_info.file_metadata.unwrap();
|
||||
|
||||
// Read metadata from write cache
|
||||
@@ -671,7 +665,7 @@ mod tests {
|
||||
};
|
||||
|
||||
write_cache
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts)
|
||||
.write_and_upload_sst(write_request, upload_request, &write_opts, WriteType::Flush)
|
||||
.await
|
||||
.unwrap_err();
|
||||
let atomic_write_dir = write_cache_dir.path().join(ATOMIC_WRITE_DIR);
|
||||
|
||||
@@ -29,7 +29,7 @@ use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_request::PathType;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::access_layer::{AccessLayer, AccessLayerRef, OperationType, SstWriteRequest};
|
||||
use crate::access_layer::{AccessLayer, AccessLayerRef, OperationType, SstWriteRequest, WriteType};
|
||||
use crate::cache::{CacheManager, CacheManagerRef};
|
||||
use crate::compaction::picker::{new_picker, PickerOutput};
|
||||
use crate::compaction::{find_ttl, CompactionSstReaderBuilder};
|
||||
@@ -352,7 +352,7 @@ impl Compactor for DefaultCompactor {
|
||||
}
|
||||
.build_sst_reader()
|
||||
.await?;
|
||||
let output_files = sst_layer
|
||||
let (sst_infos, metrics) = sst_layer
|
||||
.write_sst(
|
||||
SstWriteRequest {
|
||||
op_type: OperationType::Compact,
|
||||
@@ -367,8 +367,10 @@ impl Compactor for DefaultCompactor {
|
||||
bloom_filter_index_config,
|
||||
},
|
||||
&write_opts,
|
||||
WriteType::Compaction,
|
||||
)
|
||||
.await?
|
||||
.await?;
|
||||
let output_files = sst_infos
|
||||
.into_iter()
|
||||
.map(|sst_info| FileMeta {
|
||||
region_id,
|
||||
@@ -386,9 +388,10 @@ impl Compactor for DefaultCompactor {
|
||||
let output_file_names =
|
||||
output_files.iter().map(|f| f.file_id.to_string()).join(",");
|
||||
info!(
|
||||
"Region {} compaction inputs: [{}], outputs: [{}]",
|
||||
region_id, input_file_names, output_file_names
|
||||
"Region {} compaction inputs: [{}], outputs: [{}], metrics: {:?}",
|
||||
region_id, input_file_names, output_file_names, metrics
|
||||
);
|
||||
metrics.observe();
|
||||
Ok(output_files)
|
||||
});
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ use store_api::storage::RegionId;
|
||||
use strum::IntoStaticStr;
|
||||
use tokio::sync::{mpsc, watch};
|
||||
|
||||
use crate::access_layer::{AccessLayerRef, OperationType, SstWriteRequest};
|
||||
use crate::access_layer::{AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType};
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::{
|
||||
@@ -345,6 +345,7 @@ impl RegionFlushTask {
|
||||
let mut file_metas = Vec::with_capacity(memtables.len());
|
||||
let mut flushed_bytes = 0;
|
||||
let mut series_count = 0;
|
||||
let mut flush_metrics = Metrics::new(WriteType::Flush);
|
||||
for mem in memtables {
|
||||
if mem.is_empty() {
|
||||
// Skip empty memtables.
|
||||
@@ -399,14 +400,15 @@ impl RegionFlushTask {
|
||||
bloom_filter_index_config: self.engine_config.bloom_filter_index.clone(),
|
||||
};
|
||||
|
||||
let ssts_written = self
|
||||
let (ssts_written, metrics) = self
|
||||
.access_layer
|
||||
.write_sst(write_request, &write_opts)
|
||||
.write_sst(write_request, &write_opts, WriteType::Flush)
|
||||
.await?;
|
||||
if ssts_written.is_empty() {
|
||||
// No data written.
|
||||
continue;
|
||||
}
|
||||
flush_metrics = flush_metrics.merge(metrics);
|
||||
|
||||
file_metas.extend(ssts_written.into_iter().map(|sst_info| {
|
||||
flushed_bytes += sst_info.file_size;
|
||||
@@ -431,13 +433,15 @@ impl RegionFlushTask {
|
||||
|
||||
let file_ids: Vec<_> = file_metas.iter().map(|f| f.file_id).collect();
|
||||
info!(
|
||||
"Successfully flush memtables, region: {}, reason: {}, files: {:?}, series count: {}, cost: {:?}s",
|
||||
"Successfully flush memtables, region: {}, reason: {}, files: {:?}, series count: {}, cost: {:?}, metrics: {:?}",
|
||||
self.region_id,
|
||||
self.reason.as_str(),
|
||||
file_ids,
|
||||
series_count,
|
||||
timer.stop_and_record(),
|
||||
flush_metrics,
|
||||
);
|
||||
flush_metrics.observe();
|
||||
|
||||
let edit = RegionEdit {
|
||||
files_to_add: file_metas,
|
||||
|
||||
@@ -204,6 +204,12 @@ pub type MemtableRef = Arc<dyn Memtable>;
|
||||
pub trait MemtableBuilder: Send + Sync + fmt::Debug {
|
||||
/// Builds a new memtable instance.
|
||||
fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef;
|
||||
|
||||
/// Returns true if the memtable supports bulk insert and benefits from it.
|
||||
fn use_bulk_insert(&self, metadata: &RegionMetadataRef) -> bool {
|
||||
let _metadata = metadata;
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub type MemtableBuilderRef = Arc<dyn MemtableBuilder>;
|
||||
|
||||
@@ -29,7 +29,7 @@ use crate::memtable::{
|
||||
#[allow(unused)]
|
||||
mod context;
|
||||
#[allow(unused)]
|
||||
pub(crate) mod part;
|
||||
pub mod part;
|
||||
mod part_reader;
|
||||
mod row_group_reader;
|
||||
|
||||
|
||||
@@ -24,41 +24,49 @@ use bytes::Bytes;
|
||||
use common_grpc::flight::{FlightDecoder, FlightEncoder, FlightMessage};
|
||||
use common_recordbatch::DfRecordBatch as RecordBatch;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion::arrow::array::{TimestampNanosecondArray, UInt64Builder};
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::{
|
||||
Array, ArrayRef, BinaryBuilder, DictionaryArray, TimestampMicrosecondArray,
|
||||
TimestampMillisecondArray, TimestampSecondArray, UInt32Array, UInt64Array, UInt8Array,
|
||||
UInt8Builder,
|
||||
Array, ArrayRef, BinaryBuilder, BinaryDictionaryBuilder, DictionaryArray, StringBuilder,
|
||||
StringDictionaryBuilder, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray, UInt32Array, UInt64Array, UInt64Builder,
|
||||
UInt8Array, UInt8Builder,
|
||||
};
|
||||
use datatypes::arrow::compute::TakeOptions;
|
||||
use datatypes::arrow::datatypes::SchemaRef;
|
||||
use datatypes::arrow::compute::{SortColumn, SortOptions, TakeOptions};
|
||||
use datatypes::arrow::datatypes::{SchemaRef, UInt32Type};
|
||||
use datatypes::arrow_array::BinaryArray;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::{MutableVector, ScalarVectorBuilder, Vector};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::value::{Value, ValueRef};
|
||||
use datatypes::vectors::Helper;
|
||||
use mito_codec::key_values::{KeyValue, KeyValuesRef};
|
||||
use mito_codec::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodec, PrimaryKeyCodecExt};
|
||||
use mito_codec::key_values::{KeyValue, KeyValues, KeyValuesRef};
|
||||
use mito_codec::row_converter::{
|
||||
build_primary_key_codec, DensePrimaryKeyCodec, PrimaryKeyCodec, PrimaryKeyCodecExt,
|
||||
};
|
||||
use parquet::arrow::ArrowWriter;
|
||||
use parquet::data_type::AsBytes;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
use parquet::file::properties::WriterProperties;
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::metadata::{RegionMetadata, RegionMetadataRef};
|
||||
use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME;
|
||||
use store_api::storage::SequenceNumber;
|
||||
use table::predicate::Predicate;
|
||||
|
||||
use crate::error::{
|
||||
self, ComputeArrowSnafu, EncodeMemtableSnafu, EncodeSnafu, NewRecordBatchSnafu, Result,
|
||||
self, ColumnNotFoundSnafu, ComputeArrowSnafu, DataTypeMismatchSnafu, EncodeMemtableSnafu,
|
||||
EncodeSnafu, NewRecordBatchSnafu, Result,
|
||||
};
|
||||
use crate::memtable::bulk::context::BulkIterContextRef;
|
||||
use crate::memtable::bulk::part_reader::BulkPartIter;
|
||||
use crate::memtable::time_series::{ValueBuilder, Values};
|
||||
use crate::memtable::BoxedBatchIterator;
|
||||
use crate::sst::parquet::format::{PrimaryKeyArray, ReadFormat};
|
||||
use crate::sst::parquet::helper::parse_parquet_metadata;
|
||||
use crate::sst::to_sst_arrow_schema;
|
||||
|
||||
const INIT_DICT_VALUE_CAPACITY: usize = 8;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BulkPart {
|
||||
pub batch: RecordBatch,
|
||||
@@ -209,6 +217,281 @@ impl BulkPart {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder type for primary key dictionary array.
|
||||
type PrimaryKeyArrayBuilder = BinaryDictionaryBuilder<UInt32Type>;
|
||||
|
||||
/// Primary key column builder for handling strings specially.
|
||||
enum PrimaryKeyColumnBuilder {
|
||||
/// String dictionary builder for string types.
|
||||
StringDict(StringDictionaryBuilder<UInt32Type>),
|
||||
/// Generic mutable vector for other types.
|
||||
Vector(Box<dyn MutableVector>),
|
||||
}
|
||||
|
||||
impl PrimaryKeyColumnBuilder {
|
||||
/// Appends a value to the builder.
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match self {
|
||||
PrimaryKeyColumnBuilder::StringDict(builder) => {
|
||||
if let Some(s) = value.as_string().context(DataTypeMismatchSnafu)? {
|
||||
// We know the value is a string.
|
||||
builder.append_value(s);
|
||||
} else {
|
||||
builder.append_null();
|
||||
}
|
||||
}
|
||||
PrimaryKeyColumnBuilder::Vector(builder) => {
|
||||
builder.push_value_ref(value);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Converts the builder to an ArrayRef.
|
||||
fn into_arrow_array(self) -> ArrayRef {
|
||||
match self {
|
||||
PrimaryKeyColumnBuilder::StringDict(mut builder) => Arc::new(builder.finish()),
|
||||
PrimaryKeyColumnBuilder::Vector(mut builder) => builder.to_vector().to_arrow_array(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converter that converts structs into [BulkPart].
|
||||
pub struct BulkPartConverter {
|
||||
/// Region metadata.
|
||||
region_metadata: RegionMetadataRef,
|
||||
/// Schema of the converted batch.
|
||||
schema: SchemaRef,
|
||||
/// Primary key codec for encoding keys
|
||||
primary_key_codec: Arc<dyn PrimaryKeyCodec>,
|
||||
/// Buffer for encoding primary key.
|
||||
key_buf: Vec<u8>,
|
||||
/// Primary key array builder.
|
||||
key_array_builder: PrimaryKeyArrayBuilder,
|
||||
/// Builders for non-primary key columns.
|
||||
value_builder: ValueBuilder,
|
||||
/// Builders for individual primary key columns.
|
||||
/// The order of builders is the same as the order of primary key columns in the region metadata.
|
||||
primary_key_column_builders: Vec<PrimaryKeyColumnBuilder>,
|
||||
|
||||
/// Max timestamp value.
|
||||
max_ts: i64,
|
||||
/// Min timestamp value.
|
||||
min_ts: i64,
|
||||
/// Max sequence number.
|
||||
max_sequence: SequenceNumber,
|
||||
}
|
||||
|
||||
impl BulkPartConverter {
|
||||
/// Creates a new converter.
|
||||
///
|
||||
/// If `store_primary_key_columns` is true and the encoding is not sparse encoding, it
|
||||
/// stores primary key columns in arrays additionally.
|
||||
pub fn new(
|
||||
region_metadata: &RegionMetadataRef,
|
||||
schema: SchemaRef,
|
||||
capacity: usize,
|
||||
primary_key_codec: Arc<dyn PrimaryKeyCodec>,
|
||||
store_primary_key_columns: bool,
|
||||
) -> Self {
|
||||
debug_assert_eq!(
|
||||
region_metadata.primary_key_encoding,
|
||||
primary_key_codec.encoding()
|
||||
);
|
||||
|
||||
let primary_key_column_builders = if store_primary_key_columns
|
||||
&& region_metadata.primary_key_encoding != PrimaryKeyEncoding::Sparse
|
||||
{
|
||||
new_primary_key_column_builders(region_metadata, capacity)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Self {
|
||||
region_metadata: region_metadata.clone(),
|
||||
schema,
|
||||
primary_key_codec,
|
||||
key_buf: Vec::new(),
|
||||
key_array_builder: PrimaryKeyArrayBuilder::new(),
|
||||
value_builder: ValueBuilder::new(region_metadata, capacity),
|
||||
primary_key_column_builders,
|
||||
min_ts: i64::MAX,
|
||||
max_ts: i64::MIN,
|
||||
max_sequence: SequenceNumber::MIN,
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a [KeyValues] into the converter.
|
||||
pub fn append_key_values(&mut self, key_values: &KeyValues) -> Result<()> {
|
||||
for kv in key_values.iter() {
|
||||
self.append_key_value(&kv)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Appends a [KeyValue] to builders.
|
||||
///
|
||||
/// If the primary key uses sparse encoding, callers must encoded the primary key in the [KeyValue].
|
||||
fn append_key_value(&mut self, kv: &KeyValue) -> Result<()> {
|
||||
// Handles primary key based on encoding type
|
||||
if self.primary_key_codec.encoding() == PrimaryKeyEncoding::Sparse {
|
||||
// For sparse encoding, the primary key is already encoded in the KeyValue
|
||||
// Gets the first (and only) primary key value which contains the encoded key
|
||||
let mut primary_keys = kv.primary_keys();
|
||||
if let Some(encoded) = primary_keys
|
||||
.next()
|
||||
.context(ColumnNotFoundSnafu {
|
||||
column: PRIMARY_KEY_COLUMN_NAME,
|
||||
})?
|
||||
.as_binary()
|
||||
.context(DataTypeMismatchSnafu)?
|
||||
{
|
||||
self.key_array_builder
|
||||
.append(encoded)
|
||||
.context(ComputeArrowSnafu)?;
|
||||
} else {
|
||||
self.key_array_builder
|
||||
.append("")
|
||||
.context(ComputeArrowSnafu)?;
|
||||
}
|
||||
} else {
|
||||
// For dense encoding, we need to encode the primary key columns
|
||||
self.key_buf.clear();
|
||||
self.primary_key_codec
|
||||
.encode_key_value(kv, &mut self.key_buf)
|
||||
.context(EncodeSnafu)?;
|
||||
self.key_array_builder
|
||||
.append(&self.key_buf)
|
||||
.context(ComputeArrowSnafu)?;
|
||||
};
|
||||
|
||||
// If storing primary key columns, append values to individual builders
|
||||
if !self.primary_key_column_builders.is_empty() {
|
||||
for (builder, pk_value) in self
|
||||
.primary_key_column_builders
|
||||
.iter_mut()
|
||||
.zip(kv.primary_keys())
|
||||
{
|
||||
builder.push_value_ref(pk_value)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Pushes other columns.
|
||||
self.value_builder.push(
|
||||
kv.timestamp(),
|
||||
kv.sequence(),
|
||||
kv.op_type() as u8,
|
||||
kv.fields(),
|
||||
);
|
||||
|
||||
// Updates statistics
|
||||
// Safety: timestamp of kv must be both present and a valid timestamp value.
|
||||
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
|
||||
self.min_ts = self.min_ts.min(ts);
|
||||
self.max_ts = self.max_ts.max(ts);
|
||||
self.max_sequence = self.max_sequence.max(kv.sequence());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Converts buffered content into a [BulkPart].
|
||||
///
|
||||
/// It sorts the record batch by (primary key, timestamp, sequence desc).
|
||||
pub fn convert(mut self) -> Result<BulkPart> {
|
||||
let values = Values::from(self.value_builder);
|
||||
let mut columns =
|
||||
Vec::with_capacity(4 + values.fields.len() + self.primary_key_column_builders.len());
|
||||
|
||||
// Build primary key column arrays if enabled.
|
||||
for builder in self.primary_key_column_builders {
|
||||
columns.push(builder.into_arrow_array());
|
||||
}
|
||||
// Then fields columns.
|
||||
columns.extend(values.fields.iter().map(|field| field.to_arrow_array()));
|
||||
// Time index.
|
||||
let timestamp_index = columns.len();
|
||||
columns.push(values.timestamp.to_arrow_array());
|
||||
// Primary key.
|
||||
let pk_array = self.key_array_builder.finish();
|
||||
columns.push(Arc::new(pk_array));
|
||||
// Sequence and op type.
|
||||
columns.push(values.sequence.to_arrow_array());
|
||||
columns.push(values.op_type.to_arrow_array());
|
||||
|
||||
let batch = RecordBatch::try_new(self.schema, columns).context(NewRecordBatchSnafu)?;
|
||||
// Sorts the record batch.
|
||||
let batch = sort_primary_key_record_batch(&batch)?;
|
||||
|
||||
Ok(BulkPart {
|
||||
batch,
|
||||
max_ts: self.max_ts,
|
||||
min_ts: self.min_ts,
|
||||
sequence: self.max_sequence,
|
||||
timestamp_index,
|
||||
raw_data: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn new_primary_key_column_builders(
|
||||
metadata: &RegionMetadata,
|
||||
capacity: usize,
|
||||
) -> Vec<PrimaryKeyColumnBuilder> {
|
||||
metadata
|
||||
.primary_key_columns()
|
||||
.map(|col| {
|
||||
if col.column_schema.data_type.is_string() {
|
||||
PrimaryKeyColumnBuilder::StringDict(StringDictionaryBuilder::with_capacity(
|
||||
capacity,
|
||||
INIT_DICT_VALUE_CAPACITY,
|
||||
capacity,
|
||||
))
|
||||
} else {
|
||||
PrimaryKeyColumnBuilder::Vector(
|
||||
col.column_schema.data_type.create_mutable_vector(capacity),
|
||||
)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Sorts the record batch with primary key format.
|
||||
fn sort_primary_key_record_batch(batch: &RecordBatch) -> Result<RecordBatch> {
|
||||
let total_columns = batch.num_columns();
|
||||
let sort_columns = vec![
|
||||
// Primary key column (ascending)
|
||||
SortColumn {
|
||||
values: batch.column(total_columns - 3).clone(),
|
||||
options: Some(SortOptions {
|
||||
descending: false,
|
||||
nulls_first: true,
|
||||
}),
|
||||
},
|
||||
// Time index column (ascending)
|
||||
SortColumn {
|
||||
values: batch.column(total_columns - 4).clone(),
|
||||
options: Some(SortOptions {
|
||||
descending: false,
|
||||
nulls_first: true,
|
||||
}),
|
||||
},
|
||||
// Sequence column (descending)
|
||||
SortColumn {
|
||||
values: batch.column(total_columns - 2).clone(),
|
||||
options: Some(SortOptions {
|
||||
descending: true,
|
||||
nulls_first: true,
|
||||
}),
|
||||
},
|
||||
];
|
||||
|
||||
let indices = datatypes::arrow::compute::lexsort_to_indices(&sort_columns, None)
|
||||
.context(ComputeArrowSnafu)?;
|
||||
|
||||
datatypes::arrow::compute::take_record_batch(batch, &indices).context(ComputeArrowSnafu)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct EncodedBulkPart {
|
||||
data: Bytes,
|
||||
@@ -596,14 +879,20 @@ fn binary_array_to_dictionary(input: &BinaryArray) -> Result<PrimaryKeyArray> {
|
||||
mod tests {
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use api::v1::{Row, WriteHint};
|
||||
use datafusion_common::ScalarValue;
|
||||
use datatypes::prelude::{ScalarVector, Value};
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector, Value};
|
||||
use datatypes::vectors::{Float64Vector, TimestampMillisecondVector};
|
||||
use store_api::storage::consts::ReservedColumnId;
|
||||
|
||||
use super::*;
|
||||
use crate::memtable::bulk::context::BulkIterContext;
|
||||
use crate::sst::parquet::format::ReadFormat;
|
||||
use crate::test_util::memtable_util::{build_key_values_with_ts_seq_values, metadata_for_test};
|
||||
use crate::sst::{to_flat_sst_arrow_schema, FlatSchemaOptions};
|
||||
use crate::test_util::memtable_util::{
|
||||
build_key_values_with_ts_seq_values, metadata_for_test, metadata_with_primary_key,
|
||||
region_metadata_to_row_schema,
|
||||
};
|
||||
|
||||
fn check_binary_array_to_dictionary(
|
||||
input: &[&[u8]],
|
||||
@@ -1084,4 +1373,465 @@ mod tests {
|
||||
1,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bulk_part_converter_append_and_convert() {
|
||||
let metadata = metadata_for_test();
|
||||
let capacity = 100;
|
||||
let primary_key_codec = build_primary_key_codec(&metadata);
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
|
||||
);
|
||||
|
||||
let mut converter =
|
||||
BulkPartConverter::new(&metadata, schema, capacity, primary_key_codec, true);
|
||||
|
||||
let key_values1 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"key1".to_string(),
|
||||
1u32,
|
||||
vec![1000, 2000].into_iter(),
|
||||
vec![Some(1.0), Some(2.0)].into_iter(),
|
||||
1,
|
||||
);
|
||||
|
||||
let key_values2 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"key2".to_string(),
|
||||
2u32,
|
||||
vec![1500].into_iter(),
|
||||
vec![Some(3.0)].into_iter(),
|
||||
2,
|
||||
);
|
||||
|
||||
converter.append_key_values(&key_values1).unwrap();
|
||||
converter.append_key_values(&key_values2).unwrap();
|
||||
|
||||
let bulk_part = converter.convert().unwrap();
|
||||
|
||||
assert_eq!(bulk_part.num_rows(), 3);
|
||||
assert_eq!(bulk_part.min_ts, 1000);
|
||||
assert_eq!(bulk_part.max_ts, 2000);
|
||||
assert_eq!(bulk_part.sequence, 2);
|
||||
assert_eq!(bulk_part.timestamp_index, bulk_part.batch.num_columns() - 4);
|
||||
|
||||
// Validate primary key columns are stored
|
||||
// Schema should include primary key columns k0 and k1 at the beginning
|
||||
let schema = bulk_part.batch.schema();
|
||||
let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
|
||||
assert_eq!(
|
||||
field_names,
|
||||
vec![
|
||||
"k0",
|
||||
"k1",
|
||||
"v0",
|
||||
"v1",
|
||||
"ts",
|
||||
"__primary_key",
|
||||
"__sequence",
|
||||
"__op_type"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bulk_part_converter_sorting() {
|
||||
let metadata = metadata_for_test();
|
||||
let capacity = 100;
|
||||
let primary_key_codec = build_primary_key_codec(&metadata);
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
|
||||
);
|
||||
|
||||
let mut converter =
|
||||
BulkPartConverter::new(&metadata, schema, capacity, primary_key_codec, true);
|
||||
|
||||
let key_values1 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"z_key".to_string(),
|
||||
3u32,
|
||||
vec![3000].into_iter(),
|
||||
vec![Some(3.0)].into_iter(),
|
||||
3,
|
||||
);
|
||||
|
||||
let key_values2 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"a_key".to_string(),
|
||||
1u32,
|
||||
vec![1000].into_iter(),
|
||||
vec![Some(1.0)].into_iter(),
|
||||
1,
|
||||
);
|
||||
|
||||
let key_values3 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"m_key".to_string(),
|
||||
2u32,
|
||||
vec![2000].into_iter(),
|
||||
vec![Some(2.0)].into_iter(),
|
||||
2,
|
||||
);
|
||||
|
||||
converter.append_key_values(&key_values1).unwrap();
|
||||
converter.append_key_values(&key_values2).unwrap();
|
||||
converter.append_key_values(&key_values3).unwrap();
|
||||
|
||||
let bulk_part = converter.convert().unwrap();
|
||||
|
||||
assert_eq!(bulk_part.num_rows(), 3);
|
||||
|
||||
let ts_column = bulk_part.batch.column(bulk_part.timestamp_index);
|
||||
let seq_column = bulk_part.batch.column(bulk_part.batch.num_columns() - 2);
|
||||
|
||||
let ts_array = ts_column
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
let seq_array = seq_column.as_any().downcast_ref::<UInt64Array>().unwrap();
|
||||
|
||||
assert_eq!(ts_array.values(), &[1000, 2000, 3000]);
|
||||
assert_eq!(seq_array.values(), &[1, 2, 3]);
|
||||
|
||||
// Validate primary key columns are stored
|
||||
let schema = bulk_part.batch.schema();
|
||||
let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
|
||||
assert_eq!(
|
||||
field_names,
|
||||
vec![
|
||||
"k0",
|
||||
"k1",
|
||||
"v0",
|
||||
"v1",
|
||||
"ts",
|
||||
"__primary_key",
|
||||
"__sequence",
|
||||
"__op_type"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bulk_part_converter_empty() {
|
||||
let metadata = metadata_for_test();
|
||||
let capacity = 10;
|
||||
let primary_key_codec = build_primary_key_codec(&metadata);
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
|
||||
);
|
||||
|
||||
let converter =
|
||||
BulkPartConverter::new(&metadata, schema, capacity, primary_key_codec, true);
|
||||
|
||||
let bulk_part = converter.convert().unwrap();
|
||||
|
||||
assert_eq!(bulk_part.num_rows(), 0);
|
||||
assert_eq!(bulk_part.min_ts, i64::MAX);
|
||||
assert_eq!(bulk_part.max_ts, i64::MIN);
|
||||
assert_eq!(bulk_part.sequence, SequenceNumber::MIN);
|
||||
|
||||
// Validate primary key columns are present in schema even for empty batch
|
||||
let schema = bulk_part.batch.schema();
|
||||
let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
|
||||
assert_eq!(
|
||||
field_names,
|
||||
vec![
|
||||
"k0",
|
||||
"k1",
|
||||
"v0",
|
||||
"v1",
|
||||
"ts",
|
||||
"__primary_key",
|
||||
"__sequence",
|
||||
"__op_type"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bulk_part_converter_without_primary_key_columns() {
|
||||
let metadata = metadata_for_test();
|
||||
let primary_key_codec = build_primary_key_codec(&metadata);
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions {
|
||||
raw_pk_columns: false,
|
||||
string_pk_use_dict: true,
|
||||
},
|
||||
);
|
||||
|
||||
let capacity = 100;
|
||||
let mut converter =
|
||||
BulkPartConverter::new(&metadata, schema, capacity, primary_key_codec, false);
|
||||
|
||||
let key_values1 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"key1".to_string(),
|
||||
1u32,
|
||||
vec![1000, 2000].into_iter(),
|
||||
vec![Some(1.0), Some(2.0)].into_iter(),
|
||||
1,
|
||||
);
|
||||
|
||||
let key_values2 = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"key2".to_string(),
|
||||
2u32,
|
||||
vec![1500].into_iter(),
|
||||
vec![Some(3.0)].into_iter(),
|
||||
2,
|
||||
);
|
||||
|
||||
converter.append_key_values(&key_values1).unwrap();
|
||||
converter.append_key_values(&key_values2).unwrap();
|
||||
|
||||
let bulk_part = converter.convert().unwrap();
|
||||
|
||||
assert_eq!(bulk_part.num_rows(), 3);
|
||||
assert_eq!(bulk_part.min_ts, 1000);
|
||||
assert_eq!(bulk_part.max_ts, 2000);
|
||||
assert_eq!(bulk_part.sequence, 2);
|
||||
assert_eq!(bulk_part.timestamp_index, bulk_part.batch.num_columns() - 4);
|
||||
|
||||
// Validate primary key columns are NOT stored individually
|
||||
let schema = bulk_part.batch.schema();
|
||||
let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
|
||||
assert_eq!(
|
||||
field_names,
|
||||
vec!["v0", "v1", "ts", "__primary_key", "__sequence", "__op_type"]
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn build_key_values_with_sparse_encoding(
|
||||
metadata: &RegionMetadataRef,
|
||||
primary_key_codec: &Arc<dyn PrimaryKeyCodec>,
|
||||
table_id: u32,
|
||||
tsid: u64,
|
||||
k0: String,
|
||||
k1: String,
|
||||
timestamps: impl Iterator<Item = i64>,
|
||||
values: impl Iterator<Item = Option<f64>>,
|
||||
sequence: SequenceNumber,
|
||||
) -> KeyValues {
|
||||
// Encode the primary key (__table_id, __tsid, k0, k1) into binary format using the sparse codec
|
||||
let pk_values = vec![
|
||||
(ReservedColumnId::table_id(), Value::UInt32(table_id)),
|
||||
(ReservedColumnId::tsid(), Value::UInt64(tsid)),
|
||||
(0, Value::String(k0.clone().into())),
|
||||
(1, Value::String(k1.clone().into())),
|
||||
];
|
||||
let mut encoded_key = Vec::new();
|
||||
primary_key_codec
|
||||
.encode_values(&pk_values, &mut encoded_key)
|
||||
.unwrap();
|
||||
assert!(!encoded_key.is_empty());
|
||||
|
||||
// Create schema for sparse encoding: __primary_key, ts, v0, v1
|
||||
let column_schema = vec![
|
||||
api::v1::ColumnSchema {
|
||||
column_name: PRIMARY_KEY_COLUMN_NAME.to_string(),
|
||||
datatype: api::helper::ColumnDataTypeWrapper::try_from(
|
||||
ConcreteDataType::binary_datatype(),
|
||||
)
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: api::v1::SemanticType::Tag as i32,
|
||||
..Default::default()
|
||||
},
|
||||
api::v1::ColumnSchema {
|
||||
column_name: "ts".to_string(),
|
||||
datatype: api::helper::ColumnDataTypeWrapper::try_from(
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
)
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: api::v1::SemanticType::Timestamp as i32,
|
||||
..Default::default()
|
||||
},
|
||||
api::v1::ColumnSchema {
|
||||
column_name: "v0".to_string(),
|
||||
datatype: api::helper::ColumnDataTypeWrapper::try_from(
|
||||
ConcreteDataType::int64_datatype(),
|
||||
)
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: api::v1::SemanticType::Field as i32,
|
||||
..Default::default()
|
||||
},
|
||||
api::v1::ColumnSchema {
|
||||
column_name: "v1".to_string(),
|
||||
datatype: api::helper::ColumnDataTypeWrapper::try_from(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
)
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: api::v1::SemanticType::Field as i32,
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
|
||||
let rows = timestamps
|
||||
.zip(values)
|
||||
.map(|(ts, v)| Row {
|
||||
values: vec![
|
||||
api::v1::Value {
|
||||
value_data: Some(api::v1::value::ValueData::BinaryValue(
|
||||
encoded_key.clone(),
|
||||
)),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(api::v1::value::ValueData::TimestampMillisecondValue(ts)),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(api::v1::value::ValueData::I64Value(ts)),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: v.map(api::v1::value::ValueData::F64Value),
|
||||
},
|
||||
],
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mutation = api::v1::Mutation {
|
||||
op_type: 1,
|
||||
sequence,
|
||||
rows: Some(api::v1::Rows {
|
||||
schema: column_schema,
|
||||
rows,
|
||||
}),
|
||||
write_hint: Some(WriteHint {
|
||||
primary_key_encoding: api::v1::PrimaryKeyEncoding::Sparse.into(),
|
||||
}),
|
||||
};
|
||||
KeyValues::new(metadata.as_ref(), mutation).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bulk_part_converter_sparse_primary_key_encoding() {
|
||||
use api::v1::SemanticType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(123, 456));
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("k0", ConcreteDataType::string_datatype(), false),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 0,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("k1", ConcreteDataType::string_datatype(), false),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 1,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 2,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("v0", ConcreteDataType::int64_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("v1", ConcreteDataType::float64_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 4,
|
||||
})
|
||||
.primary_key(vec![0, 1])
|
||||
.primary_key_encoding(PrimaryKeyEncoding::Sparse);
|
||||
let metadata = Arc::new(builder.build().unwrap());
|
||||
|
||||
let primary_key_codec = build_primary_key_codec(&metadata);
|
||||
let schema = to_flat_sst_arrow_schema(
|
||||
&metadata,
|
||||
&FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
|
||||
);
|
||||
|
||||
assert_eq!(metadata.primary_key_encoding, PrimaryKeyEncoding::Sparse);
|
||||
assert_eq!(primary_key_codec.encoding(), PrimaryKeyEncoding::Sparse);
|
||||
|
||||
let capacity = 100;
|
||||
let mut converter =
|
||||
BulkPartConverter::new(&metadata, schema, capacity, primary_key_codec.clone(), true);
|
||||
|
||||
let key_values1 = build_key_values_with_sparse_encoding(
|
||||
&metadata,
|
||||
&primary_key_codec,
|
||||
2048u32, // table_id
|
||||
100u64, // tsid
|
||||
"key11".to_string(),
|
||||
"key21".to_string(),
|
||||
vec![1000, 2000].into_iter(),
|
||||
vec![Some(1.0), Some(2.0)].into_iter(),
|
||||
1,
|
||||
);
|
||||
|
||||
let key_values2 = build_key_values_with_sparse_encoding(
|
||||
&metadata,
|
||||
&primary_key_codec,
|
||||
4096u32, // table_id
|
||||
200u64, // tsid
|
||||
"key12".to_string(),
|
||||
"key22".to_string(),
|
||||
vec![1500].into_iter(),
|
||||
vec![Some(3.0)].into_iter(),
|
||||
2,
|
||||
);
|
||||
|
||||
converter.append_key_values(&key_values1).unwrap();
|
||||
converter.append_key_values(&key_values2).unwrap();
|
||||
|
||||
let bulk_part = converter.convert().unwrap();
|
||||
|
||||
assert_eq!(bulk_part.num_rows(), 3);
|
||||
assert_eq!(bulk_part.min_ts, 1000);
|
||||
assert_eq!(bulk_part.max_ts, 2000);
|
||||
assert_eq!(bulk_part.sequence, 2);
|
||||
assert_eq!(bulk_part.timestamp_index, bulk_part.batch.num_columns() - 4);
|
||||
|
||||
// For sparse encoding, primary key columns should NOT be stored individually
|
||||
// even when store_primary_key_columns is true, because sparse encoding
|
||||
// stores the encoded primary key in the __primary_key column
|
||||
let schema = bulk_part.batch.schema();
|
||||
let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
|
||||
assert_eq!(
|
||||
field_names,
|
||||
vec!["v0", "v1", "ts", "__primary_key", "__sequence", "__op_type"]
|
||||
);
|
||||
|
||||
// Verify the __primary_key column contains encoded sparse keys
|
||||
let primary_key_column = bulk_part.batch.column_by_name("__primary_key").unwrap();
|
||||
let dict_array = primary_key_column
|
||||
.as_any()
|
||||
.downcast_ref::<DictionaryArray<UInt32Type>>()
|
||||
.unwrap();
|
||||
|
||||
// Should have non-zero entries indicating encoded primary keys
|
||||
assert!(!dict_array.is_empty());
|
||||
assert_eq!(dict_array.len(), 3); // 3 rows total
|
||||
|
||||
// Verify values are properly encoded binary data (not empty)
|
||||
let values = dict_array
|
||||
.values()
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryArray>()
|
||||
.unwrap();
|
||||
for i in 0..values.len() {
|
||||
assert!(
|
||||
!values.value(i).is_empty(),
|
||||
"Encoded primary key should not be empty"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -350,6 +350,10 @@ impl MemtableBuilder for PartitionTreeMemtableBuilder {
|
||||
&self.config,
|
||||
))
|
||||
}
|
||||
|
||||
fn use_bulk_insert(&self, _metadata: &RegionMetadataRef) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
struct PartitionTreeIterBuilder {
|
||||
@@ -373,6 +377,7 @@ impl IterBuilder for PartitionTreeIterBuilder {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{Mutation, OpType, Row, Rows, SemanticType};
|
||||
@@ -402,9 +407,9 @@ mod tests {
|
||||
|
||||
fn write_iter_sorted_input(has_pk: bool) {
|
||||
let metadata = if has_pk {
|
||||
memtable_util::metadata_with_primary_key(vec![1, 0], true)
|
||||
Arc::new(memtable_util::metadata_with_primary_key(vec![1, 0], true))
|
||||
} else {
|
||||
memtable_util::metadata_with_primary_key(vec![], false)
|
||||
Arc::new(memtable_util::metadata_with_primary_key(vec![], false))
|
||||
};
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
let kvs =
|
||||
@@ -447,9 +452,9 @@ mod tests {
|
||||
|
||||
fn write_iter_unsorted_input(has_pk: bool) {
|
||||
let metadata = if has_pk {
|
||||
memtable_util::metadata_with_primary_key(vec![1, 0], true)
|
||||
Arc::new(memtable_util::metadata_with_primary_key(vec![1, 0], true))
|
||||
} else {
|
||||
memtable_util::metadata_with_primary_key(vec![], false)
|
||||
Arc::new(memtable_util::metadata_with_primary_key(vec![], false))
|
||||
};
|
||||
let codec = Arc::new(DensePrimaryKeyCodec::new(&metadata));
|
||||
let memtable = PartitionTreeMemtable::new(
|
||||
@@ -512,9 +517,9 @@ mod tests {
|
||||
|
||||
fn write_iter_projection(has_pk: bool) {
|
||||
let metadata = if has_pk {
|
||||
memtable_util::metadata_with_primary_key(vec![1, 0], true)
|
||||
Arc::new(memtable_util::metadata_with_primary_key(vec![1, 0], true))
|
||||
} else {
|
||||
memtable_util::metadata_with_primary_key(vec![], false)
|
||||
Arc::new(memtable_util::metadata_with_primary_key(vec![], false))
|
||||
};
|
||||
// Try to build a memtable via the builder.
|
||||
let memtable = PartitionTreeMemtableBuilder::new(PartitionTreeConfig::default(), None)
|
||||
@@ -552,7 +557,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn write_iter_multi_keys(max_keys: usize, freeze_threshold: usize) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let metadata = Arc::new(memtable_util::metadata_with_primary_key(vec![1, 0], true));
|
||||
let codec = Arc::new(DensePrimaryKeyCodec::new(&metadata));
|
||||
let memtable = PartitionTreeMemtable::new(
|
||||
1,
|
||||
@@ -602,7 +607,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_memtable_filter() {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![0, 1], false);
|
||||
let metadata = Arc::new(memtable_util::metadata_with_primary_key(vec![0, 1], false));
|
||||
// Try to build a memtable via the builder.
|
||||
let memtable = PartitionTreeMemtableBuilder::new(
|
||||
PartitionTreeConfig {
|
||||
|
||||
@@ -28,17 +28,19 @@ use datatypes::arrow::array::{
|
||||
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
|
||||
};
|
||||
use datatypes::arrow::buffer::{BooleanBuffer, MutableBuffer};
|
||||
use datatypes::arrow::datatypes::{DataType, Int64Type};
|
||||
use datatypes::arrow::datatypes::{DataType, Int64Type, SchemaRef};
|
||||
use mito_codec::key_values::KeyValue;
|
||||
use mito_codec::row_converter::{build_primary_key_codec, PrimaryKeyCodec};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{InvalidRequestSnafu, Result};
|
||||
use crate::memtable::bulk::part::BulkPart;
|
||||
use crate::memtable::bulk::part::{BulkPart, BulkPartConverter};
|
||||
use crate::memtable::version::SmallMemtableVec;
|
||||
use crate::memtable::{KeyValues, MemtableBuilderRef, MemtableId, MemtableRef};
|
||||
use crate::sst::{to_flat_sst_arrow_schema, FlatSchemaOptions};
|
||||
|
||||
/// Initial time window if not specified.
|
||||
const INITIAL_TIME_WINDOW: Duration = Duration::from_days(1);
|
||||
@@ -208,6 +210,12 @@ pub struct TimePartitions {
|
||||
metadata: RegionMetadataRef,
|
||||
/// Builder of memtables.
|
||||
builder: MemtableBuilderRef,
|
||||
/// Primary key encoder.
|
||||
primary_key_codec: Arc<dyn PrimaryKeyCodec>,
|
||||
|
||||
/// Cached schema for bulk insert.
|
||||
/// This field is Some if the memtable uses bulk insert.
|
||||
bulk_schema: Option<SchemaRef>,
|
||||
}
|
||||
|
||||
pub type TimePartitionsRef = Arc<TimePartitions>;
|
||||
@@ -221,11 +229,19 @@ impl TimePartitions {
|
||||
part_duration: Option<Duration>,
|
||||
) -> Self {
|
||||
let inner = PartitionsInner::new(next_memtable_id);
|
||||
let primary_key_codec = build_primary_key_codec(&metadata);
|
||||
let bulk_schema = builder.use_bulk_insert(&metadata).then(|| {
|
||||
let opts = FlatSchemaOptions::from_encoding(metadata.primary_key_encoding);
|
||||
to_flat_sst_arrow_schema(&metadata, &opts)
|
||||
});
|
||||
|
||||
Self {
|
||||
inner: Mutex::new(inner),
|
||||
part_duration: part_duration.unwrap_or(INITIAL_TIME_WINDOW),
|
||||
metadata,
|
||||
builder,
|
||||
primary_key_codec,
|
||||
bulk_schema,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -233,6 +249,21 @@ impl TimePartitions {
|
||||
///
|
||||
/// It creates new partitions if necessary.
|
||||
pub fn write(&self, kvs: &KeyValues) -> Result<()> {
|
||||
if let Some(bulk_schema) = &self.bulk_schema {
|
||||
let mut converter = BulkPartConverter::new(
|
||||
&self.metadata,
|
||||
bulk_schema.clone(),
|
||||
kvs.num_rows(),
|
||||
self.primary_key_codec.clone(),
|
||||
// Always store primary keys for bulk mode.
|
||||
true,
|
||||
);
|
||||
converter.append_key_values(kvs)?;
|
||||
let part = converter.convert()?;
|
||||
|
||||
return self.write_bulk(part);
|
||||
}
|
||||
|
||||
// Get all parts.
|
||||
let parts = self.list_partitions();
|
||||
|
||||
@@ -413,6 +444,8 @@ impl TimePartitions {
|
||||
part_duration,
|
||||
metadata: metadata.clone(),
|
||||
builder: self.builder.clone(),
|
||||
primary_key_codec: self.primary_key_codec.clone(),
|
||||
bulk_schema: self.bulk_schema.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -112,6 +112,12 @@ impl MemtableBuilder for TimeSeriesMemtableBuilder {
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn use_bulk_insert(&self, _metadata: &RegionMetadataRef) -> bool {
|
||||
// Now if we can use simple bulk memtable, the input request is already
|
||||
// a bulk write request and won't call this method.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Memtable implementation that groups rows by their primary key.
|
||||
@@ -828,7 +834,7 @@ impl Series {
|
||||
}
|
||||
|
||||
/// `ValueBuilder` holds all the vector builders for field columns.
|
||||
struct ValueBuilder {
|
||||
pub(crate) struct ValueBuilder {
|
||||
timestamp: Vec<i64>,
|
||||
timestamp_type: ConcreteDataType,
|
||||
sequence: Vec<u64>,
|
||||
@@ -872,7 +878,7 @@ impl ValueBuilder {
|
||||
/// Returns the size of field values.
|
||||
///
|
||||
/// In this method, we don't check the data type of the value, because it is already checked in the caller.
|
||||
fn push<'a>(
|
||||
pub(crate) fn push<'a>(
|
||||
&mut self,
|
||||
ts: ValueRef,
|
||||
sequence: u64,
|
||||
@@ -1103,10 +1109,10 @@ impl ValueBuilder {
|
||||
/// [Values] holds an immutable vectors of field columns, including `sequence` and `op_type`.
|
||||
#[derive(Clone)]
|
||||
pub struct Values {
|
||||
timestamp: VectorRef,
|
||||
sequence: Arc<UInt64Vector>,
|
||||
op_type: Arc<UInt8Vector>,
|
||||
fields: Vec<VectorRef>,
|
||||
pub(crate) timestamp: VectorRef,
|
||||
pub(crate) sequence: Arc<UInt64Vector>,
|
||||
pub(crate) op_type: Arc<UInt8Vector>,
|
||||
pub(crate) fields: Vec<VectorRef>,
|
||||
}
|
||||
|
||||
impl Values {
|
||||
|
||||
@@ -119,6 +119,14 @@ lazy_static! {
|
||||
|
||||
// Compaction metrics
|
||||
/// Timer of different stages in compaction.
|
||||
/// - pick
|
||||
/// - merge (in parallel)
|
||||
/// - iter_source
|
||||
/// - write_batch
|
||||
/// - update_index
|
||||
/// - upload_parquet
|
||||
/// - upload puffin
|
||||
/// - write_manifest
|
||||
pub static ref COMPACTION_STAGE_ELAPSED: HistogramVec = register_histogram_vec!(
|
||||
"greptime_mito_compaction_stage_elapsed",
|
||||
"mito compaction stage elapsed",
|
||||
|
||||
@@ -21,6 +21,7 @@ use common_base::readable_size::ReadableSize;
|
||||
use datatypes::arrow::datatypes::{
|
||||
DataType as ArrowDataType, Field, FieldRef, Fields, Schema, SchemaRef,
|
||||
};
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::consts::{
|
||||
OP_TYPE_COLUMN_NAME, PRIMARY_KEY_COLUMN_NAME, SEQUENCE_COLUMN_NAME,
|
||||
@@ -63,6 +64,101 @@ pub fn to_sst_arrow_schema(metadata: &RegionMetadata) -> SchemaRef {
|
||||
Arc::new(Schema::new(fields))
|
||||
}
|
||||
|
||||
/// Options of flat schema.
|
||||
pub struct FlatSchemaOptions {
|
||||
/// Whether to store primary key columns additionally instead of an encoded column.
|
||||
pub raw_pk_columns: bool,
|
||||
/// Whether to use dictionary encoding for string primary key columns
|
||||
/// when storing primary key columns.
|
||||
/// Only takes effect when `raw_pk_columns` is true.
|
||||
pub string_pk_use_dict: bool,
|
||||
}
|
||||
|
||||
impl Default for FlatSchemaOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
raw_pk_columns: true,
|
||||
string_pk_use_dict: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FlatSchemaOptions {
|
||||
/// Creates a options according to the primary key encoding.
|
||||
pub fn from_encoding(encoding: PrimaryKeyEncoding) -> Self {
|
||||
if encoding == PrimaryKeyEncoding::Dense {
|
||||
Self::default()
|
||||
} else {
|
||||
Self {
|
||||
raw_pk_columns: false,
|
||||
string_pk_use_dict: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the arrow schema to store in parquet.
|
||||
///
|
||||
/// The schema is:
|
||||
/// ```text
|
||||
/// primary key columns, field columns, time index, __prmary_key, __sequence, __op_type
|
||||
/// ```
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the metadata is invalid.
|
||||
pub fn to_flat_sst_arrow_schema(
|
||||
metadata: &RegionMetadata,
|
||||
options: &FlatSchemaOptions,
|
||||
) -> SchemaRef {
|
||||
let num_fields = if options.raw_pk_columns {
|
||||
metadata.column_metadatas.len() + 3
|
||||
} else {
|
||||
metadata.column_metadatas.len() + 3 - metadata.primary_key.len()
|
||||
};
|
||||
let mut fields = Vec::with_capacity(num_fields);
|
||||
let schema = metadata.schema.arrow_schema();
|
||||
if options.raw_pk_columns {
|
||||
for pk_id in &metadata.primary_key {
|
||||
let pk_index = metadata.column_index_by_id(*pk_id).unwrap();
|
||||
if options.string_pk_use_dict
|
||||
&& metadata.column_metadatas[pk_index]
|
||||
.column_schema
|
||||
.data_type
|
||||
.is_string()
|
||||
{
|
||||
let field = &schema.fields[pk_index];
|
||||
let field = Arc::new(Field::new_dictionary(
|
||||
field.name(),
|
||||
datatypes::arrow::datatypes::DataType::UInt32,
|
||||
field.data_type().clone(),
|
||||
field.is_nullable(),
|
||||
));
|
||||
fields.push(field);
|
||||
} else {
|
||||
fields.push(schema.fields[pk_index].clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
let remaining_fields = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.zip(&metadata.column_metadatas)
|
||||
.filter_map(|(field, column_meta)| {
|
||||
if column_meta.semantic_type == SemanticType::Field {
|
||||
Some(field.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.chain([metadata.time_index_field()])
|
||||
.chain(internal_fields());
|
||||
for field in remaining_fields {
|
||||
fields.push(field);
|
||||
}
|
||||
|
||||
Arc::new(Schema::new(fields))
|
||||
}
|
||||
|
||||
/// Fields for internal columns.
|
||||
fn internal_fields() -> [FieldRef; 3] {
|
||||
// Internal columns are always not null.
|
||||
|
||||
@@ -106,7 +106,9 @@ mod tests {
|
||||
use tokio_util::compat::FuturesAsyncWriteCompatExt;
|
||||
|
||||
use super::*;
|
||||
use crate::access_layer::{FilePathProvider, OperationType, RegionFilePathFactory};
|
||||
use crate::access_layer::{
|
||||
FilePathProvider, Metrics, OperationType, RegionFilePathFactory, WriteType,
|
||||
};
|
||||
use crate::cache::{CacheManager, CacheStrategy, PageKey};
|
||||
use crate::read::{BatchBuilder, BatchReader};
|
||||
use crate::region::options::{IndexOptions, InvertedIndexOptions};
|
||||
@@ -177,6 +179,7 @@ mod tests {
|
||||
metadata.clone(),
|
||||
NoopIndexBuilder,
|
||||
file_path,
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -239,6 +242,7 @@ mod tests {
|
||||
FixedPathProvider {
|
||||
region_file_id: handle.file_id(),
|
||||
},
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -318,6 +322,7 @@ mod tests {
|
||||
FixedPathProvider {
|
||||
region_file_id: handle.file_id(),
|
||||
},
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -365,6 +370,7 @@ mod tests {
|
||||
FixedPathProvider {
|
||||
region_file_id: handle.file_id(),
|
||||
},
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
writer
|
||||
@@ -422,6 +428,7 @@ mod tests {
|
||||
FixedPathProvider {
|
||||
region_file_id: handle.file_id(),
|
||||
},
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
writer
|
||||
@@ -464,6 +471,7 @@ mod tests {
|
||||
FixedPathProvider {
|
||||
region_file_id: handle.file_id(),
|
||||
},
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -617,6 +625,7 @@ mod tests {
|
||||
metadata.clone(),
|
||||
NoopIndexBuilder,
|
||||
path_provider,
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -692,6 +701,7 @@ mod tests {
|
||||
metadata.clone(),
|
||||
indexer_builder,
|
||||
file_path.clone(),
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -979,6 +989,7 @@ mod tests {
|
||||
metadata.clone(),
|
||||
NoopIndexBuilder,
|
||||
file_path,
|
||||
Metrics::new(WriteType::Flush),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use common_time::Timestamp;
|
||||
@@ -38,7 +39,7 @@ use store_api::storage::SequenceNumber;
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio_util::compat::{Compat, FuturesAsyncWriteCompatExt};
|
||||
|
||||
use crate::access_layer::{FilePathProvider, SstInfoArray, TempFileCleaner};
|
||||
use crate::access_layer::{FilePathProvider, Metrics, SstInfoArray, TempFileCleaner};
|
||||
use crate::error::{InvalidMetadataSnafu, OpenDalSnafu, Result, WriteParquetSnafu};
|
||||
use crate::read::{Batch, Source};
|
||||
use crate::sst::file::{FileId, RegionFileId};
|
||||
@@ -65,6 +66,8 @@ pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvide
|
||||
bytes_written: Arc<AtomicUsize>,
|
||||
/// Cleaner to remove temp files on failure.
|
||||
file_cleaner: Option<TempFileCleaner>,
|
||||
/// Write metrics
|
||||
metrics: Metrics,
|
||||
}
|
||||
|
||||
pub trait WriterFactory {
|
||||
@@ -100,12 +103,14 @@ where
|
||||
metadata: RegionMetadataRef,
|
||||
indexer_builder: I,
|
||||
path_provider: P,
|
||||
metrics: Metrics,
|
||||
) -> ParquetWriter<ObjectStoreWriterFactory, I, P> {
|
||||
ParquetWriter::new(
|
||||
ObjectStoreWriterFactory { object_store },
|
||||
metadata,
|
||||
indexer_builder,
|
||||
path_provider,
|
||||
metrics,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -128,6 +133,7 @@ where
|
||||
metadata: RegionMetadataRef,
|
||||
indexer_builder: I,
|
||||
path_provider: P,
|
||||
metrics: Metrics,
|
||||
) -> ParquetWriter<F, I, P> {
|
||||
let init_file = FileId::random();
|
||||
let indexer = indexer_builder.build(init_file).await;
|
||||
@@ -142,6 +148,7 @@ where
|
||||
current_indexer: Some(indexer),
|
||||
bytes_written: Arc::new(AtomicUsize::new(0)),
|
||||
file_cleaner: None,
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,12 +241,14 @@ where
|
||||
match res {
|
||||
Ok(mut batch) => {
|
||||
stats.update(&batch);
|
||||
let start = Instant::now();
|
||||
// safety: self.current_indexer must be set when first batch has been written.
|
||||
self.current_indexer
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.update(&mut batch)
|
||||
.await;
|
||||
self.metrics.update_index += start.elapsed();
|
||||
if let Some(max_file_size) = opts.max_file_size
|
||||
&& self.bytes_written.load(Ordering::Relaxed) > max_file_size
|
||||
{
|
||||
@@ -286,16 +295,21 @@ where
|
||||
write_format: &WriteFormat,
|
||||
opts: &WriteOptions,
|
||||
) -> Result<Option<Batch>> {
|
||||
let start = Instant::now();
|
||||
let Some(batch) = source.next_batch().await? else {
|
||||
return Ok(None);
|
||||
};
|
||||
self.metrics.iter_source += start.elapsed();
|
||||
|
||||
let arrow_batch = write_format.convert_batch(&batch)?;
|
||||
|
||||
let start = Instant::now();
|
||||
self.maybe_init_writer(write_format.arrow_schema(), opts)
|
||||
.await?
|
||||
.write(&arrow_batch)
|
||||
.await
|
||||
.context(WriteParquetSnafu)?;
|
||||
self.metrics.write_batch += start.elapsed();
|
||||
Ok(Some(batch))
|
||||
}
|
||||
|
||||
@@ -340,6 +354,11 @@ where
|
||||
Ok(self.writer.as_mut().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
/// Consumes write and return the collected metrics.
|
||||
pub fn into_metrics(self) -> Metrics {
|
||||
self.metrics
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -27,7 +27,9 @@ use datatypes::schema::ColumnSchema;
|
||||
use datatypes::vectors::TimestampMillisecondVector;
|
||||
use mito_codec::key_values::KeyValue;
|
||||
use mito_codec::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt, SortField};
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder, RegionMetadataRef};
|
||||
use store_api::metadata::{
|
||||
ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
|
||||
};
|
||||
use store_api::storage::{ColumnId, RegionId, SequenceNumber};
|
||||
use table::predicate::Predicate;
|
||||
|
||||
@@ -126,13 +128,17 @@ impl MemtableBuilder for EmptyMemtableBuilder {
|
||||
fn build(&self, id: MemtableId, _metadata: &RegionMetadataRef) -> MemtableRef {
|
||||
Arc::new(EmptyMemtable::new(id))
|
||||
}
|
||||
|
||||
fn use_bulk_insert(&self, _metadata: &RegionMetadataRef) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a region metadata to test memtable with default pk.
|
||||
///
|
||||
/// The schema is `k0, k1, ts, v0, v1` and pk is `k0, k1`.
|
||||
pub(crate) fn metadata_for_test() -> RegionMetadataRef {
|
||||
metadata_with_primary_key(vec![0, 1], false)
|
||||
Arc::new(metadata_with_primary_key(vec![0, 1], false))
|
||||
}
|
||||
|
||||
/// Creates a region metadata to test memtable and specific primary key.
|
||||
@@ -142,7 +148,7 @@ pub(crate) fn metadata_for_test() -> RegionMetadataRef {
|
||||
pub fn metadata_with_primary_key(
|
||||
primary_key: Vec<ColumnId>,
|
||||
enable_table_id: bool,
|
||||
) -> RegionMetadataRef {
|
||||
) -> RegionMetadata {
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(123, 456));
|
||||
let maybe_table_id = if enable_table_id { "__table_id" } else { "k1" };
|
||||
builder
|
||||
@@ -180,8 +186,7 @@ pub fn metadata_with_primary_key(
|
||||
column_id: 4,
|
||||
})
|
||||
.primary_key(primary_key);
|
||||
let region_metadata = builder.build().unwrap();
|
||||
Arc::new(region_metadata)
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
fn semantic_type_of_column(column_id: ColumnId, primary_key: &[ColumnId]) -> SemanticType {
|
||||
|
||||
@@ -33,7 +33,7 @@ use crate::error::Result;
|
||||
use crate::flush::FlushScheduler;
|
||||
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
|
||||
use crate::region::{ManifestContext, ManifestContextRef, RegionLeaderState, RegionRoleState};
|
||||
use crate::request::{WorkerRequest, WorkerRequestWithTime};
|
||||
use crate::request::WorkerRequestWithTime;
|
||||
use crate::schedule::scheduler::{Job, LocalScheduler, Scheduler, SchedulerRef};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
|
||||
@@ -16,7 +16,7 @@ use async_trait::async_trait;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_function::handlers::ProcedureServiceHandler;
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutorRef};
|
||||
use common_meta::procedure_executor::{ExecutorContext, ProcedureExecutorRef};
|
||||
use common_meta::rpc::procedure::{
|
||||
AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
|
||||
RemoveRegionFollowerRequest,
|
||||
|
||||
@@ -36,12 +36,12 @@ use client::RecordBatches;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::TableRouteCacheRef;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::view_info::{ViewInfoManager, ViewInfoManagerRef};
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::procedure_executor::ProcedureExecutorRef;
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing;
|
||||
use common_time::range::TimestampRange;
|
||||
|
||||
@@ -31,10 +31,10 @@ use common_catalog::{format_full_flow_name, format_full_table_name};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::Context;
|
||||
use common_meta::ddl::create_flow::FlowType;
|
||||
use common_meta::ddl::ExecutorContext;
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::schema_name::{SchemaName, SchemaNameKey};
|
||||
use common_meta::key::NAME_PATTERN;
|
||||
use common_meta::procedure_executor::ExecutorContext;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use common_meta::rpc::ddl::trigger::CreateTriggerTask;
|
||||
#[cfg(feature = "enterprise")]
|
||||
|
||||
@@ -272,6 +272,37 @@ impl DatafusionQueryEngine {
|
||||
ctx: &mut QueryEngineContext,
|
||||
logical_plan: &LogicalPlan,
|
||||
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
/// Only print context on panic, to avoid cluttering logs.
|
||||
///
|
||||
/// TODO(discord9): remove this once we catch the bug
|
||||
#[derive(Debug)]
|
||||
struct PanicLogger<'a> {
|
||||
input_logical_plan: &'a LogicalPlan,
|
||||
after_analyze: Option<LogicalPlan>,
|
||||
after_optimize: Option<LogicalPlan>,
|
||||
phy_plan: Option<Arc<dyn ExecutionPlan>>,
|
||||
}
|
||||
impl Drop for PanicLogger<'_> {
|
||||
fn drop(&mut self) {
|
||||
if std::thread::panicking() {
|
||||
common_telemetry::error!(
|
||||
"Panic while creating physical plan, input logical plan: {:?}, after analyze: {:?}, after optimize: {:?}, final physical plan: {:?}",
|
||||
self.input_logical_plan,
|
||||
self.after_analyze,
|
||||
self.after_optimize,
|
||||
self.phy_plan
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut logger = PanicLogger {
|
||||
input_logical_plan: logical_plan,
|
||||
after_analyze: None,
|
||||
after_optimize: None,
|
||||
phy_plan: None,
|
||||
};
|
||||
|
||||
let _timer = metrics::CREATE_PHYSICAL_ELAPSED.start_timer();
|
||||
let state = ctx.state();
|
||||
|
||||
@@ -295,6 +326,8 @@ impl DatafusionQueryEngine {
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?;
|
||||
|
||||
logger.after_analyze = Some(analyzed_plan.clone());
|
||||
|
||||
common_telemetry::debug!("Create physical plan, analyzed plan: {analyzed_plan}");
|
||||
|
||||
// skip optimize for MergeScan
|
||||
@@ -312,12 +345,15 @@ impl DatafusionQueryEngine {
|
||||
};
|
||||
|
||||
common_telemetry::debug!("Create physical plan, optimized plan: {optimized_plan}");
|
||||
logger.after_optimize = Some(optimized_plan.clone());
|
||||
|
||||
let physical_plan = state
|
||||
.query_planner()
|
||||
.create_physical_plan(&optimized_plan, state)
|
||||
.await?;
|
||||
|
||||
logger.phy_plan = Some(physical_plan.clone());
|
||||
drop(logger);
|
||||
Ok(physical_plan)
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.10.5
|
||||
v0.10.6
|
||||
|
||||
@@ -122,7 +122,7 @@ impl PrometheusGatewayService {
|
||||
let result = self.handler.do_query(&query, ctx).await;
|
||||
let (metric_name, mut result_type) =
|
||||
match retrieve_metric_name_and_result_type(&query.query) {
|
||||
Ok((metric_name, result_type)) => (metric_name.unwrap_or_default(), result_type),
|
||||
Ok((metric_name, result_type)) => (metric_name, result_type),
|
||||
Err(err) => {
|
||||
return PrometheusJsonResponse::error(err.status_code(), err.output_msg())
|
||||
}
|
||||
|
||||
@@ -887,7 +887,19 @@ impl HttpServer {
|
||||
"/prof",
|
||||
Router::new()
|
||||
.route("/cpu", routing::post(pprof::pprof_handler))
|
||||
.route("/mem", routing::post(mem_prof::mem_prof_handler)),
|
||||
.route("/mem", routing::post(mem_prof::mem_prof_handler))
|
||||
.route(
|
||||
"/mem/activate",
|
||||
routing::post(mem_prof::activate_heap_prof_handler),
|
||||
)
|
||||
.route(
|
||||
"/mem/deactivate",
|
||||
routing::post(mem_prof::deactivate_heap_prof_handler),
|
||||
)
|
||||
.route(
|
||||
"/mem/status",
|
||||
routing::get(mem_prof::heap_prof_status_handler),
|
||||
),
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -56,6 +56,30 @@ pub async fn mem_prof_handler(
|
||||
Ok((StatusCode::OK, dump))
|
||||
}
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn activate_heap_prof_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::DumpProfileDataSnafu;
|
||||
|
||||
common_mem_prof::activate_heap_profile().context(DumpProfileDataSnafu)?;
|
||||
|
||||
Ok((StatusCode::OK, "Heap profiling activated"))
|
||||
}
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn deactivate_heap_prof_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::DumpProfileDataSnafu;
|
||||
|
||||
common_mem_prof::deactivate_heap_profile().context(DumpProfileDataSnafu)?;
|
||||
|
||||
Ok((StatusCode::OK, "Heap profiling deactivated"))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "mem-prof"))]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn mem_prof_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
@@ -64,3 +88,42 @@ pub async fn mem_prof_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
"The 'mem-prof' feature is disabled",
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "mem-prof"))]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn activate_heap_prof_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
Ok((
|
||||
StatusCode::NOT_IMPLEMENTED,
|
||||
"The 'mem-prof' feature is disabled",
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn heap_prof_status_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::DumpProfileDataSnafu;
|
||||
|
||||
let is_active = common_mem_prof::is_heap_profile_active().context(DumpProfileDataSnafu)?;
|
||||
|
||||
Ok((StatusCode::OK, format!("{{\"active\": {}}}", is_active)))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "mem-prof"))]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn deactivate_heap_prof_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
Ok((
|
||||
StatusCode::NOT_IMPLEMENTED,
|
||||
"The 'mem-prof' feature is disabled",
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "mem-prof"))]
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn heap_prof_status_handler() -> crate::error::Result<impl IntoResponse> {
|
||||
Ok((
|
||||
StatusCode::NOT_IMPLEMENTED,
|
||||
"The 'mem-prof' feature is disabled",
|
||||
))
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ use opentelemetry_proto::tonic::collector::trace::v1::{
|
||||
use pipeline::PipelineWay;
|
||||
use prost::Message;
|
||||
use session::context::{Channel, QueryContext};
|
||||
use session::protocol_ctx::{OtlpMetricCtx, ProtocolCtx};
|
||||
use session::protocol_ctx::{MetricType, OtlpMetricCtx, ProtocolCtx};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::error::{self, PipelineSnafu, Result};
|
||||
@@ -80,6 +80,7 @@ pub async fn metrics(
|
||||
with_metric_engine,
|
||||
// set is_legacy later
|
||||
is_legacy: false,
|
||||
metric_type: MetricType::Init,
|
||||
}));
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ use crate::http::extractor::PipelineInfo;
|
||||
use crate::http::header::{write_cost_header_map, GREPTIME_DB_HEADER_METRICS};
|
||||
use crate::http::PromValidationMode;
|
||||
use crate::prom_row_builder::TablesBuilder;
|
||||
use crate::prom_store::{snappy_decompress, zstd_decompress};
|
||||
use crate::prom_store::{extract_schema_from_read_request, snappy_decompress, zstd_decompress};
|
||||
use crate::proto::{PromSeriesProcessor, PromWriteRequest};
|
||||
use crate::query_handler::{PipelineHandlerRef, PromStoreProtocolHandlerRef, PromStoreResponse};
|
||||
|
||||
@@ -118,6 +118,7 @@ pub async fn remote_write(
|
||||
let is_zstd = content_encoding.contains(VM_ENCODING);
|
||||
|
||||
let mut processor = PromSeriesProcessor::default_processor();
|
||||
|
||||
if let Some(pipeline_name) = pipeline_info.pipeline_name {
|
||||
let pipeline_def = PipelineDefinition::from_name(
|
||||
&pipeline_name,
|
||||
@@ -192,13 +193,19 @@ pub async fn remote_read(
|
||||
) -> Result<PromStoreResponse> {
|
||||
let db = params.db.clone().unwrap_or_default();
|
||||
query_ctx.set_channel(Channel::Prometheus);
|
||||
|
||||
let request = decode_remote_read_request(body).await?;
|
||||
|
||||
// Extract schema from special labels and set it in query context
|
||||
if let Some(schema) = extract_schema_from_read_request(&request) {
|
||||
query_ctx.set_current_schema(&schema);
|
||||
}
|
||||
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
let _timer = crate::metrics::METRIC_HTTP_PROM_STORE_READ_ELAPSED
|
||||
.with_label_values(&[db.as_str()])
|
||||
.start_timer();
|
||||
|
||||
let request = decode_remote_read_request(body).await?;
|
||||
|
||||
state.prom_store_handler.read(request, query_ctx).await
|
||||
}
|
||||
|
||||
|
||||
@@ -318,7 +318,7 @@ async fn do_instant_query(
|
||||
) -> PrometheusJsonResponse {
|
||||
let result = handler.do_query(prom_query, query_ctx).await;
|
||||
let (metric_name, result_type) = match retrieve_metric_name_and_result_type(&prom_query.query) {
|
||||
Ok((metric_name, result_type)) => (metric_name.unwrap_or_default(), result_type),
|
||||
Ok((metric_name, result_type)) => (metric_name, result_type),
|
||||
Err(err) => return PrometheusJsonResponse::error(err.status_code(), err.output_msg()),
|
||||
};
|
||||
PrometheusJsonResponse::from_query_result(result, metric_name, result_type).await
|
||||
@@ -428,7 +428,7 @@ async fn do_range_query(
|
||||
let result = handler.do_query(prom_query, query_ctx).await;
|
||||
let metric_name = match retrieve_metric_name_and_result_type(&prom_query.query) {
|
||||
Err(err) => return PrometheusJsonResponse::error(err.status_code(), err.output_msg()),
|
||||
Ok((metric_name, _)) => metric_name.unwrap_or_default(),
|
||||
Ok((metric_name, _)) => metric_name,
|
||||
};
|
||||
PrometheusJsonResponse::from_query_result(result, metric_name, ValueType::Matrix).await
|
||||
}
|
||||
@@ -824,13 +824,52 @@ pub(crate) fn try_update_catalog_schema(ctx: &mut QueryContext, catalog: &str, s
|
||||
}
|
||||
|
||||
fn promql_expr_to_metric_name(expr: &PromqlExpr) -> Option<String> {
|
||||
find_metric_name_and_matchers(expr, |name, matchers| {
|
||||
name.clone().or(matchers
|
||||
.find_matchers(METRIC_NAME)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|m| m.value))
|
||||
})
|
||||
let mut metric_names = HashSet::new();
|
||||
collect_metric_names(expr, &mut metric_names);
|
||||
|
||||
// Return the metric name only if there's exactly one unique metric name
|
||||
if metric_names.len() == 1 {
|
||||
metric_names.into_iter().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively collect all metric names from a PromQL expression
|
||||
fn collect_metric_names(expr: &PromqlExpr, metric_names: &mut HashSet<String>) {
|
||||
match expr {
|
||||
PromqlExpr::Aggregate(AggregateExpr { expr, .. }) => {
|
||||
collect_metric_names(expr, metric_names)
|
||||
}
|
||||
PromqlExpr::Unary(UnaryExpr { expr }) => collect_metric_names(expr, metric_names),
|
||||
PromqlExpr::Binary(BinaryExpr { lhs, rhs, .. }) => {
|
||||
collect_metric_names(lhs, metric_names);
|
||||
collect_metric_names(rhs, metric_names);
|
||||
}
|
||||
PromqlExpr::Paren(ParenExpr { expr }) => collect_metric_names(expr, metric_names),
|
||||
PromqlExpr::Subquery(SubqueryExpr { expr, .. }) => collect_metric_names(expr, metric_names),
|
||||
PromqlExpr::VectorSelector(VectorSelector { name, matchers, .. }) => {
|
||||
if let Some(name) = name {
|
||||
metric_names.insert(name.clone());
|
||||
} else if let Some(matcher) = matchers.find_matchers(METRIC_NAME).into_iter().next() {
|
||||
metric_names.insert(matcher.value);
|
||||
}
|
||||
}
|
||||
PromqlExpr::MatrixSelector(MatrixSelector { vs, .. }) => {
|
||||
let VectorSelector { name, matchers, .. } = vs;
|
||||
if let Some(name) = name {
|
||||
metric_names.insert(name.clone());
|
||||
} else if let Some(matcher) = matchers.find_matchers(METRIC_NAME).into_iter().next() {
|
||||
metric_names.insert(matcher.value);
|
||||
}
|
||||
}
|
||||
PromqlExpr::Call(Call { args, .. }) => {
|
||||
args.args
|
||||
.iter()
|
||||
.for_each(|e| collect_metric_names(e, metric_names));
|
||||
}
|
||||
PromqlExpr::NumberLiteral(_) | PromqlExpr::StringLiteral(_) | PromqlExpr::Extension(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_metric_name_and_matchers<E, F>(expr: &PromqlExpr, f: F) -> Option<E>
|
||||
@@ -1114,51 +1153,11 @@ async fn retrieve_field_names(
|
||||
|
||||
/// Try to parse and extract the name of referenced metric from the promql query.
|
||||
///
|
||||
/// Returns the metric name if a single metric is referenced, otherwise None.
|
||||
/// Returns the metric name if exactly one unique metric is referenced, otherwise None.
|
||||
/// Multiple references to the same metric are allowed.
|
||||
fn retrieve_metric_name_from_promql(query: &str) -> Option<String> {
|
||||
let promql_expr = promql_parser::parser::parse(query).ok()?;
|
||||
|
||||
struct MetricNameVisitor {
|
||||
metric_name: Option<String>,
|
||||
}
|
||||
|
||||
impl promql_parser::util::ExprVisitor for MetricNameVisitor {
|
||||
type Error = ();
|
||||
|
||||
fn pre_visit(&mut self, plan: &PromqlExpr) -> std::result::Result<bool, Self::Error> {
|
||||
let query_metric_name = match plan {
|
||||
PromqlExpr::VectorSelector(vs) => vs
|
||||
.matchers
|
||||
.find_matchers(METRIC_NAME)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|m| m.value)
|
||||
.or_else(|| vs.name.clone()),
|
||||
PromqlExpr::MatrixSelector(ms) => ms
|
||||
.vs
|
||||
.matchers
|
||||
.find_matchers(METRIC_NAME)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|m| m.value)
|
||||
.or_else(|| ms.vs.name.clone()),
|
||||
_ => return Ok(true),
|
||||
};
|
||||
|
||||
// set it to empty string if multiple metrics are referenced.
|
||||
if self.metric_name.is_some() && query_metric_name.is_some() {
|
||||
self.metric_name = Some(String::new());
|
||||
} else {
|
||||
self.metric_name = query_metric_name.or_else(|| self.metric_name.clone());
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
let mut visitor = MetricNameVisitor { metric_name: None };
|
||||
promql_parser::util::walk_expr(&mut visitor, &promql_expr).ok()?;
|
||||
visitor.metric_name
|
||||
promql_expr_to_metric_name(&promql_expr)
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
@@ -1275,3 +1274,205 @@ pub async fn parse_query(
|
||||
PrometheusJsonResponse::error(StatusCode::InvalidArguments, "query is required")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use promql_parser::parser::value::ValueType;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct TestCase {
|
||||
name: &'static str,
|
||||
promql: &'static str,
|
||||
expected_metric: Option<&'static str>,
|
||||
expected_type: ValueType,
|
||||
should_error: bool,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_metric_name_and_result_type() {
|
||||
let test_cases = &[
|
||||
// Single metric cases
|
||||
TestCase {
|
||||
name: "simple metric",
|
||||
promql: "cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with selector",
|
||||
promql: r#"cpu_usage{instance="localhost"}"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with range selector",
|
||||
promql: "cpu_usage[5m]",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Matrix,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with __name__ matcher",
|
||||
promql: r#"{__name__="cpu_usage"}"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with unary operator",
|
||||
promql: "-cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Aggregation and function cases
|
||||
TestCase {
|
||||
name: "metric with aggregation",
|
||||
promql: "sum(cpu_usage)",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "complex aggregation",
|
||||
promql: r#"sum by (instance) (cpu_usage{job="node"})"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Same metric binary operations
|
||||
TestCase {
|
||||
name: "same metric addition",
|
||||
promql: "cpu_usage + cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with scalar addition",
|
||||
promql: r#"sum(rate(cpu_usage{job="node"}[5m])) by (instance) + 100"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Multiple metrics cases
|
||||
TestCase {
|
||||
name: "different metrics addition",
|
||||
promql: "cpu_usage + memory_usage",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "different metrics subtraction",
|
||||
promql: "network_in - network_out",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Unless operator cases
|
||||
TestCase {
|
||||
name: "unless with different metrics",
|
||||
promql: "cpu_usage unless memory_usage",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "unless with same metric",
|
||||
promql: "cpu_usage unless cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Subquery cases
|
||||
TestCase {
|
||||
name: "basic subquery",
|
||||
promql: "cpu_usage[5m:1m]",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Matrix,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "subquery with multiple metrics",
|
||||
promql: "(cpu_usage + memory_usage)[5m:1m]",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Matrix,
|
||||
should_error: false,
|
||||
},
|
||||
// Literal values
|
||||
TestCase {
|
||||
name: "scalar value",
|
||||
promql: "42",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Scalar,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "string literal",
|
||||
promql: r#""hello world""#,
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::String,
|
||||
should_error: false,
|
||||
},
|
||||
// Error cases
|
||||
TestCase {
|
||||
name: "invalid syntax",
|
||||
promql: "cpu_usage{invalid=",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: true,
|
||||
},
|
||||
TestCase {
|
||||
name: "empty query",
|
||||
promql: "",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: true,
|
||||
},
|
||||
TestCase {
|
||||
name: "malformed brackets",
|
||||
promql: "cpu_usage[5m",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: true,
|
||||
},
|
||||
];
|
||||
|
||||
for test_case in test_cases {
|
||||
let result = retrieve_metric_name_and_result_type(test_case.promql);
|
||||
|
||||
if test_case.should_error {
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Test '{}' should have failed but succeeded with: {:?}",
|
||||
test_case.name,
|
||||
result
|
||||
);
|
||||
} else {
|
||||
let (metric_name, value_type) = result.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Test '{}' should have succeeded but failed with error: {}",
|
||||
test_case.name, e
|
||||
)
|
||||
});
|
||||
|
||||
let expected_metric_name = test_case.expected_metric.map(|s| s.to_string());
|
||||
assert_eq!(
|
||||
metric_name, expected_metric_name,
|
||||
"Test '{}': metric name mismatch. Expected: {:?}, Got: {:?}",
|
||||
test_case.name, expected_metric_name, metric_name
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
value_type, test_case.expected_type,
|
||||
"Test '{}': value type mismatch. Expected: {:?}, Got: {:?}",
|
||||
test_case.name, test_case.expected_type, value_type
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,7 +118,7 @@ impl PrometheusJsonResponse {
|
||||
/// Convert from `Result<Output>`
|
||||
pub async fn from_query_result(
|
||||
result: Result<Output>,
|
||||
metric_name: String,
|
||||
metric_name: Option<String>,
|
||||
result_type: ValueType,
|
||||
) -> Self {
|
||||
let response: Result<Self> = try {
|
||||
@@ -182,7 +182,7 @@ impl PrometheusJsonResponse {
|
||||
/// Convert [RecordBatches] to [PromData]
|
||||
fn record_batches_to_data(
|
||||
batches: RecordBatches,
|
||||
metric_name: String,
|
||||
metric_name: Option<String>,
|
||||
result_type: ValueType,
|
||||
) -> Result<PrometheusResponse> {
|
||||
// infer semantic type of each column from schema.
|
||||
@@ -230,7 +230,6 @@ impl PrometheusJsonResponse {
|
||||
reason: "no value column found".to_string(),
|
||||
})?;
|
||||
|
||||
let metric_name = (METRIC_NAME, metric_name.as_str());
|
||||
// Preserves the order of output tags.
|
||||
// Tag order matters, e.g., after sorc and sort_desc, the output order must be kept.
|
||||
let mut buffer = IndexMap::<Vec<(&str, &str)>, Vec<(f64, String)>>::new();
|
||||
@@ -276,9 +275,10 @@ impl PrometheusJsonResponse {
|
||||
}
|
||||
|
||||
// retrieve tags
|
||||
// TODO(ruihang): push table name `__metric__`
|
||||
let mut tags = Vec::with_capacity(num_label_columns + 1);
|
||||
tags.push(metric_name);
|
||||
if let Some(metric_name) = &metric_name {
|
||||
tags.push((METRIC_NAME, metric_name.as_str()));
|
||||
}
|
||||
for (tag_column, tag_name) in tag_columns.iter().zip(tag_names.iter()) {
|
||||
// TODO(ruihang): add test for NULL tag
|
||||
if let Some(tag_value) = tag_column.get_data(row_index) {
|
||||
|
||||
@@ -12,16 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use ahash::HashSet;
|
||||
use ahash::{HashMap, HashSet};
|
||||
use api::v1::{RowInsertRequests, Value};
|
||||
use common_grpc::precision::Precision;
|
||||
use common_query::prelude::{GREPTIME_COUNT, GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::{any_value, AnyValue, KeyValue};
|
||||
use opentelemetry_proto::tonic::metrics::v1::{metric, number_data_point, *};
|
||||
use regex::Regex;
|
||||
use session::protocol_ctx::OtlpMetricCtx;
|
||||
use session::protocol_ctx::{MetricType, OtlpMetricCtx};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::otlp::trace::{KEY_SERVICE_INSTANCE_ID, KEY_SERVICE_NAME};
|
||||
@@ -37,6 +38,9 @@ const JOB_KEY: &str = "job";
|
||||
const INSTANCE_KEY: &str = "instance";
|
||||
|
||||
const UNDERSCORE: &str = "_";
|
||||
const DOUBLE_UNDERSCORE: &str = "__";
|
||||
const TOTAL: &str = "total";
|
||||
const RATIO: &str = "ratio";
|
||||
|
||||
// see: https://prometheus.io/docs/guides/opentelemetry/#promoting-resource-attributes
|
||||
const DEFAULT_PROMOTE_ATTRS: [&str; 19] = [
|
||||
@@ -64,7 +68,48 @@ const DEFAULT_PROMOTE_ATTRS: [&str; 19] = [
|
||||
lazy_static! {
|
||||
static ref DEFAULT_PROMOTE_ATTRS_SET: HashSet<String> =
|
||||
HashSet::from_iter(DEFAULT_PROMOTE_ATTRS.iter().map(|s| s.to_string()));
|
||||
static ref INVALID_METRIC_NAME: Regex = Regex::new(r"[^a-zA-Z0-9:_]").unwrap();
|
||||
static ref NON_ALPHA_NUM_CHAR: Regex = Regex::new(r"[^a-zA-Z0-9]").unwrap();
|
||||
static ref UNIT_MAP: HashMap<String, String> = [
|
||||
// Time
|
||||
("d", "days"),
|
||||
("h", "hours"),
|
||||
("min", "minutes"),
|
||||
("s", "seconds"),
|
||||
("ms", "milliseconds"),
|
||||
("us", "microseconds"),
|
||||
("ns", "nanoseconds"),
|
||||
// Bytes
|
||||
("By", "bytes"),
|
||||
("KiBy", "kibibytes"),
|
||||
("MiBy", "mebibytes"),
|
||||
("GiBy", "gibibytes"),
|
||||
("TiBy", "tibibytes"),
|
||||
("KBy", "kilobytes"),
|
||||
("MBy", "megabytes"),
|
||||
("GBy", "gigabytes"),
|
||||
("TBy", "terabytes"),
|
||||
// SI
|
||||
("m", "meters"),
|
||||
("V", "volts"),
|
||||
("A", "amperes"),
|
||||
("J", "joules"),
|
||||
("W", "watts"),
|
||||
("g", "grams"),
|
||||
// Misc
|
||||
("Cel", "celsius"),
|
||||
("Hz", "hertz"),
|
||||
("1", ""),
|
||||
("%", "percent"),
|
||||
].iter().map(|(k, v)| (k.to_string(), v.to_string())).collect();
|
||||
static ref PER_UNIT_MAP: HashMap<String, String> = [
|
||||
("s", "second"),
|
||||
("m", "minute"),
|
||||
("h", "hour"),
|
||||
("d", "day"),
|
||||
("w", "week"),
|
||||
("mo", "month"),
|
||||
("y", "year"),
|
||||
].iter().map(|(k, v)| (k.to_string(), v.to_string())).collect();
|
||||
}
|
||||
|
||||
const OTEL_SCOPE_NAME: &str = "name";
|
||||
@@ -80,7 +125,7 @@ const OTEL_SCOPE_SCHEMA_URL: &str = "schema_url";
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
pub fn to_grpc_insert_requests(
|
||||
request: ExportMetricsServiceRequest,
|
||||
metric_ctx: &OtlpMetricCtx,
|
||||
metric_ctx: &mut OtlpMetricCtx,
|
||||
) -> Result<(RowInsertRequests, usize)> {
|
||||
let mut table_writer = MultiTableData::default();
|
||||
|
||||
@@ -95,6 +140,13 @@ pub fn to_grpc_insert_requests(
|
||||
let scope_attrs = process_scope_attrs(scope, metric_ctx);
|
||||
|
||||
for metric in &scope.metrics {
|
||||
if metric.data.is_none() {
|
||||
continue;
|
||||
}
|
||||
if let Some(t) = metric.data.as_ref().map(from_metric_type) {
|
||||
metric_ctx.set_metric_type(t);
|
||||
}
|
||||
|
||||
encode_metrics(
|
||||
&mut table_writer,
|
||||
metric,
|
||||
@@ -109,6 +161,22 @@ pub fn to_grpc_insert_requests(
|
||||
Ok(table_writer.into_row_insert_requests())
|
||||
}
|
||||
|
||||
fn from_metric_type(data: &metric::Data) -> MetricType {
|
||||
match data {
|
||||
metric::Data::Gauge(_) => MetricType::Gauge,
|
||||
metric::Data::Sum(s) => {
|
||||
if s.is_monotonic {
|
||||
MetricType::MonotonicSum
|
||||
} else {
|
||||
MetricType::NonMonotonicSum
|
||||
}
|
||||
}
|
||||
metric::Data::Histogram(_) => MetricType::Histogram,
|
||||
metric::Data::ExponentialHistogram(_) => MetricType::ExponentialHistogram,
|
||||
metric::Data::Summary(_) => MetricType::Summary,
|
||||
}
|
||||
}
|
||||
|
||||
fn process_resource_attrs(attrs: &mut Vec<KeyValue>, metric_ctx: &OtlpMetricCtx) {
|
||||
if metric_ctx.is_legacy {
|
||||
return;
|
||||
@@ -181,10 +249,37 @@ fn process_scope_attrs(scope: &ScopeMetrics, metric_ctx: &OtlpMetricCtx) -> Opti
|
||||
})
|
||||
}
|
||||
|
||||
// replace . with _
|
||||
// see: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus
|
||||
pub fn normalize_metric_name(name: &str) -> String {
|
||||
let name = INVALID_METRIC_NAME.replace_all(name, UNDERSCORE);
|
||||
// See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/145942706622aba5c276ca47f48df438228bfea4/pkg/translator/prometheus/normalize_name.go#L55
|
||||
pub fn normalize_metric_name(metric: &Metric, metric_type: &MetricType) -> String {
|
||||
let mut name_tokens = NON_ALPHA_NUM_CHAR
|
||||
.split(&metric.name)
|
||||
.map(|s| s.to_string())
|
||||
.collect_vec();
|
||||
if !metric.unit.is_empty() {
|
||||
let (main, per) = build_unit_suffix(&metric.unit);
|
||||
if let Some(main) = main
|
||||
&& !name_tokens.contains(&main)
|
||||
{
|
||||
name_tokens.push(main);
|
||||
}
|
||||
if let Some(per) = per
|
||||
&& !name_tokens.contains(&per)
|
||||
{
|
||||
name_tokens.push("per".to_string());
|
||||
name_tokens.push(per);
|
||||
}
|
||||
}
|
||||
|
||||
if matches!(metric_type, MetricType::MonotonicSum) {
|
||||
name_tokens.retain(|t| t != TOTAL);
|
||||
name_tokens.push(TOTAL.to_string());
|
||||
}
|
||||
if metric.unit == "1" && matches!(metric_type, MetricType::Gauge) {
|
||||
name_tokens.retain(|t| t != RATIO);
|
||||
name_tokens.push(RATIO.to_string());
|
||||
}
|
||||
|
||||
let name = name_tokens.join(UNDERSCORE);
|
||||
|
||||
if let Some((_, first)) = name.char_indices().next()
|
||||
&& first >= '0'
|
||||
@@ -192,10 +287,50 @@ pub fn normalize_metric_name(name: &str) -> String {
|
||||
{
|
||||
format!("_{}", name)
|
||||
} else {
|
||||
name.to_string()
|
||||
name
|
||||
}
|
||||
}
|
||||
|
||||
fn build_unit_suffix(unit: &str) -> (Option<String>, Option<String>) {
|
||||
let (main, per) = unit.split_once('/').unwrap_or((unit, ""));
|
||||
(check_unit(main, &UNIT_MAP), check_unit(per, &PER_UNIT_MAP))
|
||||
}
|
||||
|
||||
fn check_unit(unit_str: &str, unit_map: &HashMap<String, String>) -> Option<String> {
|
||||
let u = unit_str.trim();
|
||||
if !u.is_empty() && !u.contains("{}") {
|
||||
let u = unit_map.get(u).map(|s| s.as_ref()).unwrap_or(u);
|
||||
let u = clean_unit_name(u);
|
||||
if !u.is_empty() {
|
||||
return Some(u);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn clean_unit_name(name: &str) -> String {
|
||||
NON_ALPHA_NUM_CHAR.split(name).join(UNDERSCORE)
|
||||
}
|
||||
|
||||
// See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/145942706622aba5c276ca47f48df438228bfea4/pkg/translator/prometheus/normalize_label.go#L27
|
||||
pub fn normalize_label_name(name: &str) -> String {
|
||||
if name.is_empty() {
|
||||
return name.to_string();
|
||||
}
|
||||
|
||||
let n = NON_ALPHA_NUM_CHAR.replace_all(name, UNDERSCORE);
|
||||
if let Some((_, first)) = n.char_indices().next()
|
||||
&& first >= '0'
|
||||
&& first <= '9'
|
||||
{
|
||||
return format!("key_{}", n);
|
||||
}
|
||||
if n.starts_with(UNDERSCORE) && !n.starts_with(DOUBLE_UNDERSCORE) {
|
||||
return format!("key{}", n);
|
||||
}
|
||||
n.to_string()
|
||||
}
|
||||
|
||||
/// Normalize otlp instrumentation, metric and attribute names
|
||||
///
|
||||
/// <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/api.md#instrument-name-syntax>
|
||||
@@ -216,7 +351,7 @@ fn encode_metrics(
|
||||
let name = if metric_ctx.is_legacy {
|
||||
legacy_normalize_otlp_name(&metric.name)
|
||||
} else {
|
||||
normalize_metric_name(&metric.name)
|
||||
normalize_metric_name(metric, &metric_ctx.metric_type)
|
||||
};
|
||||
|
||||
// note that we don't store description or unit, we might want to deal with
|
||||
@@ -296,10 +431,10 @@ fn write_attributes(
|
||||
.and_then(|val| {
|
||||
let key = match attribute_type {
|
||||
AttributeType::Resource | AttributeType::DataPoint => {
|
||||
normalize_metric_name(&attr.key)
|
||||
normalize_label_name(&attr.key)
|
||||
}
|
||||
AttributeType::Scope => {
|
||||
format!("otel_scope_{}", normalize_metric_name(&attr.key))
|
||||
format!("otel_scope_{}", normalize_label_name(&attr.key))
|
||||
}
|
||||
AttributeType::Legacy => legacy_normalize_otlp_name(&attr.key),
|
||||
};
|
||||
@@ -746,6 +881,181 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_metric_name() {
|
||||
let test_cases = vec![
|
||||
// Default case
|
||||
(Metric::default(), MetricType::Init, ""),
|
||||
// Basic metric with just name
|
||||
(
|
||||
Metric {
|
||||
name: "foo".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo",
|
||||
),
|
||||
// Metric with unit "s" should append "seconds"
|
||||
(
|
||||
Metric {
|
||||
name: "foo".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_seconds",
|
||||
),
|
||||
// Metric already ending with unit suffix should not duplicate
|
||||
(
|
||||
Metric {
|
||||
name: "foo_seconds".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_seconds",
|
||||
),
|
||||
// Monotonic sum should append "total"
|
||||
(
|
||||
Metric {
|
||||
name: "foo".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_total",
|
||||
),
|
||||
// Metric already ending with "total" should not duplicate
|
||||
(
|
||||
Metric {
|
||||
name: "foo_total".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_total",
|
||||
),
|
||||
// Monotonic sum with unit should append both unit and "total"
|
||||
(
|
||||
Metric {
|
||||
name: "foo".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_seconds_total",
|
||||
),
|
||||
// Metric with unit suffix and monotonic sum
|
||||
(
|
||||
Metric {
|
||||
name: "foo_seconds".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_seconds_total",
|
||||
),
|
||||
// Metric already ending with "total" and has unit
|
||||
(
|
||||
Metric {
|
||||
name: "foo_total".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_seconds_total",
|
||||
),
|
||||
// Metric already ending with both unit and "total"
|
||||
(
|
||||
Metric {
|
||||
name: "foo_seconds_total".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_seconds_total",
|
||||
),
|
||||
// Metric with unusual order (total_seconds) should be normalized
|
||||
(
|
||||
Metric {
|
||||
name: "foo_total_seconds".to_string(),
|
||||
unit: "s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::MonotonicSum,
|
||||
"foo_seconds_total",
|
||||
),
|
||||
// Gauge with unit "1" should append "ratio"
|
||||
(
|
||||
Metric {
|
||||
name: "foo".to_string(),
|
||||
unit: "1".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Gauge,
|
||||
"foo_ratio",
|
||||
),
|
||||
// Complex unit like "m/s" should be converted to "meters_per_second"
|
||||
(
|
||||
Metric {
|
||||
name: "foo".to_string(),
|
||||
unit: "m/s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_meters_per_second",
|
||||
),
|
||||
// Metric with partial unit match
|
||||
(
|
||||
Metric {
|
||||
name: "foo_second".to_string(),
|
||||
unit: "m/s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_second_meters",
|
||||
),
|
||||
// Metric already containing the main unit
|
||||
(
|
||||
Metric {
|
||||
name: "foo_meters".to_string(),
|
||||
unit: "m/s".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
MetricType::Init,
|
||||
"foo_meters_per_second",
|
||||
),
|
||||
];
|
||||
|
||||
for (metric, metric_type, expected) in test_cases {
|
||||
let result = normalize_metric_name(&metric, &metric_type);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"Failed for metric name: '{}', unit: '{}', type: {:?}",
|
||||
metric.name, metric.unit, metric_type
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_label_name() {
|
||||
let test_cases = vec![
|
||||
("", ""),
|
||||
("foo", "foo"),
|
||||
("foo_bar/baz:abc", "foo_bar_baz_abc"),
|
||||
("1foo", "key_1foo"),
|
||||
("_foo", "key_foo"),
|
||||
("__bar", "__bar"),
|
||||
];
|
||||
|
||||
for (input, expected) in test_cases {
|
||||
let result = normalize_label_name(input);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"unexpected result for input '{}'; got '{}'; want '{}'",
|
||||
input, result, expected
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn keyvalue(key: &str, value: &str) -> KeyValue {
|
||||
KeyValue {
|
||||
key: key.into(),
|
||||
@@ -1023,12 +1333,4 @@ mod tests {
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_otlp_name() {
|
||||
assert_eq!(normalize_metric_name("test.123"), "test_123");
|
||||
assert_eq!(normalize_metric_name("test_123"), "test_123");
|
||||
assert_eq!(normalize_metric_name("test._123"), "test__123");
|
||||
assert_eq!(normalize_metric_name("123_test"), "_123_test");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::collections::BTreeMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use api::prom_store::remote::label_matcher::Type as MatcherType;
|
||||
use api::prom_store::remote::{Label, Query, Sample, TimeSeries, WriteRequest};
|
||||
use api::prom_store::remote::{Label, Query, ReadRequest, Sample, TimeSeries, WriteRequest};
|
||||
use api::v1::RowInsertRequests;
|
||||
use common_grpc::precision::Precision;
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
@@ -44,6 +44,9 @@ pub const METRIC_NAME_LABEL_BYTES: &[u8] = b"__name__";
|
||||
pub const DATABASE_LABEL: &str = "__database__";
|
||||
pub const DATABASE_LABEL_BYTES: &[u8] = b"__database__";
|
||||
|
||||
pub const SCHEMA_LABEL: &str = "__schema__";
|
||||
pub const SCHEMA_LABEL_BYTES: &[u8] = b"__schema__";
|
||||
|
||||
pub const PHYSICAL_TABLE_LABEL: &str = "__physical_table__";
|
||||
pub const PHYSICAL_TABLE_LABEL_BYTES: &[u8] = b"__physical_table__";
|
||||
|
||||
@@ -73,6 +76,29 @@ pub fn table_name(q: &Query) -> Result<String> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract schema from remote read request. Returns the first schema found from any query's matchers.
|
||||
/// Prioritizes __schema__ over __database__ labels.
|
||||
pub fn extract_schema_from_read_request(request: &ReadRequest) -> Option<String> {
|
||||
for query in &request.queries {
|
||||
for matcher in &query.matchers {
|
||||
if matcher.name == SCHEMA_LABEL && matcher.r#type == MatcherType::Eq as i32 {
|
||||
return Some(matcher.value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no __schema__ found, look for __database__
|
||||
for query in &request.queries {
|
||||
for matcher in &query.matchers {
|
||||
if matcher.name == DATABASE_LABEL && matcher.r#type == MatcherType::Eq as i32 {
|
||||
return Some(matcher.value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Create a DataFrame from a remote Query
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub fn query_to_plan(dataframe: DataFrame, q: &Query) -> Result<LogicalPlan> {
|
||||
@@ -91,7 +117,7 @@ pub fn query_to_plan(dataframe: DataFrame, q: &Query) -> Result<LogicalPlan> {
|
||||
for m in label_matches {
|
||||
let name = &m.name;
|
||||
|
||||
if name == METRIC_NAME_LABEL {
|
||||
if name == METRIC_NAME_LABEL || name == SCHEMA_LABEL || name == DATABASE_LABEL {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ use crate::http::PromValidationMode;
|
||||
use crate::pipeline::run_pipeline;
|
||||
use crate::prom_row_builder::{PromCtx, TablesBuilder};
|
||||
use crate::prom_store::{
|
||||
DATABASE_LABEL_BYTES, METRIC_NAME_LABEL_BYTES, PHYSICAL_TABLE_LABEL_BYTES,
|
||||
DATABASE_LABEL_BYTES, METRIC_NAME_LABEL_BYTES, PHYSICAL_TABLE_LABEL_BYTES, SCHEMA_LABEL_BYTES,
|
||||
};
|
||||
use crate::query_handler::PipelineHandlerRef;
|
||||
use crate::repeated_field::{Clear, RepeatedField};
|
||||
@@ -201,10 +201,17 @@ impl PromTimeSeries {
|
||||
self.table_name = prom_validation_mode.decode_string(&label.value)?;
|
||||
self.labels.truncate(self.labels.len() - 1); // remove last label
|
||||
}
|
||||
DATABASE_LABEL_BYTES => {
|
||||
SCHEMA_LABEL_BYTES => {
|
||||
self.schema = Some(prom_validation_mode.decode_string(&label.value)?);
|
||||
self.labels.truncate(self.labels.len() - 1); // remove last label
|
||||
}
|
||||
DATABASE_LABEL_BYTES => {
|
||||
// Only set schema from __database__ if __schema__ hasn't been set yet
|
||||
if self.schema.is_none() {
|
||||
self.schema = Some(prom_validation_mode.decode_string(&label.value)?);
|
||||
}
|
||||
self.labels.truncate(self.labels.len() - 1); // remove last label
|
||||
}
|
||||
PHYSICAL_TABLE_LABEL_BYTES => {
|
||||
self.physical_table =
|
||||
Some(prom_validation_mode.decode_string(&label.value)?);
|
||||
|
||||
@@ -62,6 +62,8 @@ pub struct TlsOption {
|
||||
#[serde(default)]
|
||||
pub key_path: String,
|
||||
#[serde(default)]
|
||||
pub ca_cert_path: String,
|
||||
#[serde(default)]
|
||||
pub watch: bool,
|
||||
}
|
||||
|
||||
@@ -253,6 +255,7 @@ mod tests {
|
||||
mode: Disable,
|
||||
cert_path: "/path/to/cert_path".to_string(),
|
||||
key_path: "/path/to/key_path".to_string(),
|
||||
ca_cert_path: String::new(),
|
||||
watch: false
|
||||
},
|
||||
TlsOption::new(
|
||||
@@ -413,6 +416,7 @@ mod tests {
|
||||
.into_os_string()
|
||||
.into_string()
|
||||
.expect("failed to convert path to string"),
|
||||
ca_cert_path: String::new(),
|
||||
watch: true,
|
||||
};
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user