mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 08:29:59 +00:00
Compare commits
32 Commits
v0.7.1-1
...
v0.8.0-nig
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2b2fd80bf4 | ||
|
|
24886b9530 | ||
|
|
8345f1753c | ||
|
|
3420a010e6 | ||
|
|
9f020aa414 | ||
|
|
c9ac72e7f8 | ||
|
|
86fb9d8ac7 | ||
|
|
1f0fc40287 | ||
|
|
8b7a5aaa4a | ||
|
|
856a4e1e4f | ||
|
|
39b69f1e3b | ||
|
|
bbcdb28b7c | ||
|
|
6377982501 | ||
|
|
ddbcff68dd | ||
|
|
5b315c2d40 | ||
|
|
9816d2a08b | ||
|
|
a99d6eb3f9 | ||
|
|
2c115bc22a | ||
|
|
641592644d | ||
|
|
fa0f3555d4 | ||
|
|
3cad844acd | ||
|
|
cf25cf984b | ||
|
|
3acd5bfad0 | ||
|
|
343525dab8 | ||
|
|
0afac58e4d | ||
|
|
393ea44de0 | ||
|
|
44731fd653 | ||
|
|
d36a5a74d3 | ||
|
|
74862f8c3f | ||
|
|
a52aedec5b | ||
|
|
b6fac619a6 | ||
|
|
a29e7ebb7d |
21
.github/workflows/unassign.yml
vendored
Normal file
21
.github/workflows/unassign.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
name: Auto Unassign
|
||||
on:
|
||||
schedule:
|
||||
- cron: '4 2 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
auto-unassign:
|
||||
name: Auto Unassign
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Auto Unassign
|
||||
uses: tisonspieces/auto-unassign@main
|
||||
with:
|
||||
token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
|
||||
repository: ${{ github.repository }}
|
||||
15
Cargo.lock
generated
15
Cargo.lock
generated
@@ -863,6 +863,12 @@ version = "0.21.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51"
|
||||
|
||||
[[package]]
|
||||
name = "base64ct"
|
||||
version = "1.6.0"
|
||||
@@ -3482,6 +3488,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"servers",
|
||||
"session",
|
||||
"smallvec",
|
||||
"snafu",
|
||||
"tokio",
|
||||
"tonic 0.10.2",
|
||||
@@ -3863,7 +3870,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=96f1f0404f421ee560a4310c73c5071e49168168#96f1f0404f421ee560a4310c73c5071e49168168"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=349cb385583697f41010dabeb3c106d58f9599b4#349cb385583697f41010dabeb3c106d58f9599b4"
|
||||
dependencies = [
|
||||
"prost 0.12.3",
|
||||
"serde",
|
||||
@@ -6696,12 +6703,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pgwire"
|
||||
version = "0.19.1"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17780c93587822c191c3f4d43fa5f6bc6df1e51b9f58a0be0cd1b7fd6e80d9e6"
|
||||
checksum = "c00492c52bb65e0421211b7f4c5d9de7586e53786a3b244efb00f74851206bf6"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.21.5",
|
||||
"base64 0.22.0",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"derive-new 0.6.0",
|
||||
|
||||
@@ -103,7 +103,7 @@ etcd-client = "0.12"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96f1f0404f421ee560a4310c73c5071e49168168" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "349cb385583697f41010dabeb3c106d58f9599b4" }
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
lazy_static = "1.4"
|
||||
|
||||
219
README.md
219
README.md
@@ -6,145 +6,154 @@
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
<h1 align="center">Cloud-scale, Fast and Efficient Time Series Database</h1>
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
The next-generation hybrid time-series/analytics processing database in the cloud
|
||||
</h3>
|
||||
<a href="https://greptime.com/product/cloud">GreptimeCloud</a> |
|
||||
<a href="https://docs.greptime.com/">User guide</a> |
|
||||
<a href="https://greptimedb.rs/">API Docs</a> |
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/issues/3412">Roadmap 2024</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C"></img></a>
|
||||
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml"><img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="CI"></img></a>
|
||||
|
||||
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
|
||||
</p>
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
|
||||
<img src="https://img.shields.io/github/v/release/GreptimeTeam/greptimedb.svg" alt="Version"/>
|
||||
</a>
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/releases/latest">
|
||||
<img src="https://img.shields.io/github/release-date/GreptimeTeam/greptimedb.svg" alt="Releases"/>
|
||||
</a>
|
||||
<a href="https://hub.docker.com/r/greptime/greptimedb/">
|
||||
<img src="https://img.shields.io/docker/pulls/greptime/greptimedb.svg" alt="Docker Pulls"/>
|
||||
</a>
|
||||
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml">
|
||||
<img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="GitHub Actions"/>
|
||||
</a>
|
||||
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb">
|
||||
<img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C" alt="Codecov"/>
|
||||
</a>
|
||||
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE">
|
||||
<img src="https://img.shields.io/github/license/greptimeTeam/greptimedb" alt="License"/>
|
||||
</a>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/greptime"><img src="https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg"></a>
|
||||
|
||||
<a href="https://www.linkedin.com/company/greptime/"><img src="https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg"></a>
|
||||
|
||||
<a href="https://greptime.com/slack"><img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack" alt="slack" /></a>
|
||||
</p>
|
||||
<br/>
|
||||
|
||||
## What is GreptimeDB
|
||||
<a href="https://greptime.com/slack">
|
||||
<img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack&style=for-the-badge" alt="Slack"/>
|
||||
</a>
|
||||
<a href="https://twitter.com/greptime">
|
||||
<img src="https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg?style=for-the-badge" alt="Twitter"/>
|
||||
</a>
|
||||
<a href="https://www.linkedin.com/company/greptime/">
|
||||
<img src="https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg?style=for-the-badge" alt="LinkedIn"/>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
|
||||
It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
|
||||
## Introduction
|
||||
|
||||
Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:
|
||||
**GreptimeDB** is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
|
||||
Designed to work on infrastructure of the cloud era, GreptimeDB benefits users with its elasticity and commodity storage, offering a fast and cost-effective **alternative to InfluxDB** and a **long-term storage for Prometheus**.
|
||||
|
||||
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
|
||||
- Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
|
||||
- Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
|
||||
- Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
|
||||
- Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down.
|
||||
- Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc.
|
||||
## Why GreptimeDB
|
||||
|
||||
## Quick Start
|
||||
Our core developers have been building time-series data platforms for years. Based on our best-practices, GreptimeDB is born to give you:
|
||||
|
||||
### [GreptimePlay](https://greptime.com/playground)
|
||||
* **Easy horizontal scaling**
|
||||
|
||||
Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
|
||||
|
||||
* **Analyzing time-series data**
|
||||
|
||||
Query your time-series data with SQL and PromQL. Use Python scripts to facilitate complex analytical tasks.
|
||||
|
||||
* **Cloud-native distributed database**
|
||||
|
||||
Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
|
||||
|
||||
* **Performance and Cost-effective**
|
||||
|
||||
Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down. Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
|
||||
|
||||
* **Compatible with InfluxDB, Prometheus and more protocols**
|
||||
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
|
||||
|
||||
## Try GreptimeDB
|
||||
|
||||
### 1. [GreptimePlay](https://greptime.com/playground)
|
||||
|
||||
Try out the features of GreptimeDB right from your browser.
|
||||
|
||||
### Build
|
||||
### 2. [GreptimeCloud](https://console.greptime.cloud/)
|
||||
|
||||
#### Build from Source
|
||||
Start instantly with a free cluster.
|
||||
|
||||
To compile GreptimeDB from source, you'll need:
|
||||
### 3. Docker Image
|
||||
|
||||
- C/C++ Toolchain: provides basic tools for compiling and linking. This is
|
||||
available as `build-essential` on ubuntu and similar name on other platforms.
|
||||
- Rust: the easiest way to install Rust is to use
|
||||
[`rustup`](https://rustup.rs/), which will check our `rust-toolchain` file and
|
||||
install correct Rust version for you.
|
||||
- Protobuf: `protoc` is required for compiling `.proto` files. `protobuf` is
|
||||
available from major package manager on macos and linux distributions. You can
|
||||
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
|
||||
keyword. You can check it with `protoc --version`.
|
||||
- python3-dev or python3-devel(Optional feature, only needed if you want to run scripts
|
||||
in CPython, and also need to enable `pyo3_backend` feature when compiling(by `cargo run -F pyo3_backend` or add `pyo3_backend` to src/script/Cargo.toml 's `features.default` like `default = ["python", "pyo3_backend]`)): this install a Python shared library required for running Python
|
||||
scripting engine(In CPython Mode). This is available as `python3-dev` on
|
||||
ubuntu, you can install it with `sudo apt install python3-dev`, or
|
||||
`python3-devel` on RPM based distributions (e.g. Fedora, Red Hat, SuSE). Mac's
|
||||
`Python3` package should have this shared library by default. More detail for compiling with PyO3 can be found in [PyO3](https://pyo3.rs/v0.18.1/building_and_distribution#configuring-the-python-version)'s documentation.
|
||||
To install GreptimeDB locally, the recommended way is via Docker:
|
||||
|
||||
#### Build with Docker
|
||||
|
||||
A docker image with necessary dependencies is provided:
|
||||
|
||||
```
|
||||
docker build --network host -f docker/Dockerfile -t greptimedb .
|
||||
```shell
|
||||
docker pull greptime/greptimedb
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
Start GreptimeDB from source code, in standalone mode:
|
||||
Start a GreptimeDB container with:
|
||||
|
||||
```shell
|
||||
docker run --rm --name greptime --net=host greptime/greptimedb standalone start
|
||||
```
|
||||
|
||||
Read more about [Installation](https://docs.greptime.com/getting-started/installation/overview) on docs.
|
||||
|
||||
## Getting Started
|
||||
|
||||
* [Quickstart](https://docs.greptime.com/getting-started/quick-start/overview)
|
||||
* [Write Data](https://docs.greptime.com/user-guide/clients/overview)
|
||||
* [Query Data](https://docs.greptime.com/user-guide/query-data/overview)
|
||||
* [Operations](https://docs.greptime.com/user-guide/operations/overview)
|
||||
|
||||
## Build
|
||||
|
||||
Check the prerequisite:
|
||||
|
||||
* [Rust toolchain](https://www.rust-lang.org/tools/install) (nightly)
|
||||
* [Protobuf compiler](https://grpc.io/docs/protoc-installation/) (>= 3.15)
|
||||
* Python toolchain (optional): Required only if built with PyO3 backend. More detail for compiling with PyO3 can be found in its [documentation](https://pyo3.rs/v0.18.1/building_and_distribution#configuring-the-python-version).
|
||||
|
||||
Build GreptimeDB binary:
|
||||
|
||||
```shell
|
||||
make
|
||||
```
|
||||
|
||||
Run a standalone server:
|
||||
|
||||
```shell
|
||||
cargo run -- standalone start
|
||||
```
|
||||
|
||||
Or if you built from docker:
|
||||
|
||||
```
|
||||
docker run -p 4002:4002 -v "$(pwd):/tmp/greptimedb" greptime/greptimedb standalone start
|
||||
```
|
||||
|
||||
Please see the online document site for more installation options and [operations info](https://docs.greptime.com/user-guide/operations/overview).
|
||||
|
||||
### Get started
|
||||
|
||||
Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview) on our [official document site](https://docs.greptime.com/).
|
||||
|
||||
To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview).
|
||||
|
||||
## Resources
|
||||
|
||||
### Installation
|
||||
|
||||
- [Pre-built Binaries](https://greptime.com/download):
|
||||
For Linux and macOS, you can easily download pre-built binaries including official releases and nightly builds that are ready to use.
|
||||
In most cases, downloading the version without PyO3 is sufficient. However, if you plan to run scripts in CPython (and use Python packages like NumPy and Pandas), you will need to download the version with PyO3 and install a Python with the same version as the Python in the PyO3 version.
|
||||
We recommend using virtualenv for the installation process to manage multiple Python versions.
|
||||
- [Docker Images](https://hub.docker.com/r/greptime/greptimedb)(**recommended**): pre-built
|
||||
Docker images, this is the easiest way to try GreptimeDB. By default it runs CPython script with `pyo3_backend` enabled.
|
||||
- [`gtctl`](https://github.com/GreptimeTeam/gtctl): the command-line tool for
|
||||
Kubernetes deployment
|
||||
|
||||
### Documentation
|
||||
|
||||
- GreptimeDB [User Guide](https://docs.greptime.com/user-guide/concepts/overview)
|
||||
- GreptimeDB [Developer
|
||||
Guide](https://docs.greptime.com/developer-guide/overview.html)
|
||||
- GreptimeDB [internal code document](https://greptimedb.rs)
|
||||
## Extension
|
||||
|
||||
### Dashboard
|
||||
|
||||
- [The dashboard UI for GreptimeDB](https://github.com/GreptimeTeam/dashboard)
|
||||
|
||||
### SDK
|
||||
|
||||
- [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp)
|
||||
- [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl)
|
||||
- [GreptimeDB Go Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-go)
|
||||
- [GreptimeDB Java Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-java)
|
||||
- [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
|
||||
- [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)
|
||||
- [GreptimeDB JavaScript Client](https://github.com/GreptimeTeam/greptime-js-sdk)
|
||||
- [GreptimeDB C++ Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-cpp)
|
||||
- [GreptimeDB Erlang Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-erl)
|
||||
- [GreptimeDB Rust Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-rust)
|
||||
- [GreptimeDB JavaScript Ingester](https://github.com/GreptimeTeam/greptime-ingester-js)
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
Our official Grafana dashboard is available at [grafana](./grafana/README.md) directory.
|
||||
Our official Grafana dashboard is available at [grafana](grafana/README.md) directory.
|
||||
|
||||
## Project Status
|
||||
|
||||
This project is in its early stage and under heavy development. We move fast and
|
||||
break things. Benchmark on development branch may not represent its potential
|
||||
performance. We release pre-built binaries constantly for functional
|
||||
evaluation. Do not use it in production at the moment.
|
||||
|
||||
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/669).
|
||||
The current version has not yet reached General Availability version standards.
|
||||
In line with our Greptime 2024 Roadmap, we plan to achieve a production-level
|
||||
version with the update to v1.0 in August. [[Join Force]](https://github.com/GreptimeTeam/greptimedb/issues/3412)
|
||||
|
||||
## Community
|
||||
|
||||
@@ -154,12 +163,12 @@ and what went wrong. If you have any questions or if you would like to get invol
|
||||
community, please check out:
|
||||
|
||||
- GreptimeDB Community on [Slack](https://greptime.com/slack)
|
||||
- GreptimeDB GitHub [Discussions](https://github.com/GreptimeTeam/greptimedb/discussions)
|
||||
- Greptime official [Website](https://greptime.com)
|
||||
- GreptimeDB [GitHub Discussions forum](https://github.com/GreptimeTeam/greptimedb/discussions)
|
||||
- Greptime official [website](https://greptime.com)
|
||||
|
||||
In addition, you may:
|
||||
|
||||
- View our official [Blog](https://greptime.com/blogs/index)
|
||||
- View our official [Blog](https://greptime.com/blogs/)
|
||||
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
|
||||
- Follow us on [Twitter](https://twitter.com/greptime)
|
||||
|
||||
@@ -170,7 +179,7 @@ open contributions and allowing you to use the software however you want.
|
||||
|
||||
## Contributing
|
||||
|
||||
Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
|
||||
Please refer to [contribution guidelines](CONTRIBUTING.md) and [internal concepts docs](https://docs.greptime.com/contributor-guide/overview.html) for more information.
|
||||
|
||||
## Acknowledgement
|
||||
|
||||
|
||||
@@ -140,9 +140,9 @@ intermediate_path = ""
|
||||
|
||||
[region_engine.mito.memtable]
|
||||
# Memtable type.
|
||||
# - "experimental": experimental memtable
|
||||
# - "partition_tree": partition tree memtable
|
||||
# - "time_series": time-series memtable (deprecated)
|
||||
type = "experimental"
|
||||
type = "partition_tree"
|
||||
# The max number of keys in one shard.
|
||||
index_max_keys_per_shard = 8192
|
||||
# The max rows of data inside the actively writing buffer in one shard.
|
||||
|
||||
@@ -246,9 +246,9 @@ intermediate_path = ""
|
||||
|
||||
[region_engine.mito.memtable]
|
||||
# Memtable type.
|
||||
# - "experimental": experimental memtable
|
||||
# - "partition_tree": partition tree memtable
|
||||
# - "time_series": time-series memtable (deprecated)
|
||||
type = "experimental"
|
||||
type = "partition_tree"
|
||||
# The max number of keys in one shard.
|
||||
index_max_keys_per_shard = 8192
|
||||
# The max rows of data inside the actively writing buffer in one shard.
|
||||
|
||||
@@ -27,7 +27,7 @@ function retry_fetch() {
|
||||
echo "Failed to download $url"
|
||||
echo "You may try to set http_proxy and https_proxy environment variables."
|
||||
if [[ -z "$GITHUB_PROXY_URL" ]]; then
|
||||
echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/"
|
||||
echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/https://github.com/"
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
@@ -39,7 +39,7 @@ function retry_fetch() {
|
||||
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/sha256.txt" sha256.txt
|
||||
|
||||
# Download the tar file containing the built dashboard assets.
|
||||
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz" build.tar.gz
|
||||
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/build.tar.gz" build.tar.gz
|
||||
|
||||
# Verify the checksums match; exit if they don't.
|
||||
case "$(uname -s)" in
|
||||
|
||||
@@ -707,7 +707,6 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
}
|
||||
|
||||
pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
// TODO(fys): use macros to optimize code
|
||||
match data_type {
|
||||
ConcreteDataType::Int64(_) => values
|
||||
.i64_values
|
||||
|
||||
@@ -40,7 +40,7 @@ pub fn user_provider_from_option(opt: &String) -> Result<UserProviderRef> {
|
||||
match name {
|
||||
STATIC_USER_PROVIDER => {
|
||||
let provider =
|
||||
StaticUserProvider::try_from(content).map(|p| Arc::new(p) as UserProviderRef)?;
|
||||
StaticUserProvider::new(content).map(|p| Arc::new(p) as UserProviderRef)?;
|
||||
Ok(provider)
|
||||
}
|
||||
_ => InvalidConfigSnafu {
|
||||
|
||||
@@ -23,7 +23,7 @@ use secrecy::ExposeSecret;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
Error, IllegalParamSnafu, InvalidConfigSnafu, IoSnafu, Result, UnsupportedPasswordTypeSnafu,
|
||||
IllegalParamSnafu, InvalidConfigSnafu, IoSnafu, Result, UnsupportedPasswordTypeSnafu,
|
||||
UserNotFoundSnafu, UserPasswordMismatchSnafu,
|
||||
};
|
||||
use crate::user_info::DefaultUserInfo;
|
||||
@@ -31,10 +31,12 @@ use crate::{auth_mysql, Identity, Password, UserInfoRef, UserProvider};
|
||||
|
||||
pub(crate) const STATIC_USER_PROVIDER: &str = "static_user_provider";
|
||||
|
||||
impl TryFrom<&str> for StaticUserProvider {
|
||||
type Error = Error;
|
||||
pub(crate) struct StaticUserProvider {
|
||||
users: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
fn try_from(value: &str) -> Result<Self> {
|
||||
impl StaticUserProvider {
|
||||
pub(crate) fn new(value: &str) -> Result<Self> {
|
||||
let (mode, content) = value.split_once(':').context(InvalidConfigSnafu {
|
||||
value: value.to_string(),
|
||||
msg: "StaticUserProviderOption must be in format `<option>:<value>`",
|
||||
@@ -83,15 +85,11 @@ impl TryFrom<&str> for StaticUserProvider {
|
||||
value: mode.to_string(),
|
||||
msg: "StaticUserProviderOption must be in format `file:<path>` or `cmd:<values>`",
|
||||
}
|
||||
.fail(),
|
||||
.fail(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct StaticUserProvider {
|
||||
users: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl UserProvider for StaticUserProvider {
|
||||
fn name(&self) -> &str {
|
||||
@@ -181,7 +179,7 @@ pub mod test {
|
||||
#[tokio::test]
|
||||
async fn test_authorize() {
|
||||
let user_info = DefaultUserInfo::with_name("root");
|
||||
let provider = StaticUserProvider::try_from("cmd:root=123456,admin=654321").unwrap();
|
||||
let provider = StaticUserProvider::new("cmd:root=123456,admin=654321").unwrap();
|
||||
provider
|
||||
.authorize("catalog", "schema", &user_info)
|
||||
.await
|
||||
@@ -190,7 +188,7 @@ pub mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_inline_provider() {
|
||||
let provider = StaticUserProvider::try_from("cmd:root=123456,admin=654321").unwrap();
|
||||
let provider = StaticUserProvider::new("cmd:root=123456,admin=654321").unwrap();
|
||||
test_authenticate(&provider, "root", "123456").await;
|
||||
test_authenticate(&provider, "admin", "654321").await;
|
||||
}
|
||||
@@ -214,7 +212,7 @@ admin=654321",
|
||||
}
|
||||
|
||||
let param = format!("file:{file_path}");
|
||||
let provider = StaticUserProvider::try_from(param.as_str()).unwrap();
|
||||
let provider = StaticUserProvider::new(param.as_str()).unwrap();
|
||||
test_authenticate(&provider, "root", "123456").await;
|
||||
test_authenticate(&provider, "admin", "654321").await;
|
||||
}
|
||||
|
||||
@@ -25,13 +25,13 @@ use common_catalog::format_full_table_name;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
|
||||
use common_meta::error::Result as MetaResult;
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::catalog_name::CatalogNameKey;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::table_info::TableInfoValue;
|
||||
use common_meta::key::table_name::TableNameKey;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::table_name::TableName;
|
||||
use futures_util::stream::BoxStream;
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use moka::future::{Cache as AsyncCache, CacheBuilder};
|
||||
@@ -39,7 +39,6 @@ use moka::sync::Cache;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use snafu::prelude::*;
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::TableId;
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
use table::TableRef;
|
||||
|
||||
@@ -79,24 +78,18 @@ fn make_table(table_info_value: TableInfoValue) -> CatalogResult<TableRef> {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl CacheInvalidator for KvBackendCatalogManager {
|
||||
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
|
||||
self.cache_invalidator
|
||||
.invalidate_table_id(ctx, table_id)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
|
||||
let table_cache_key = format_full_table_name(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
);
|
||||
self.cache_invalidator
|
||||
.invalidate_table_name(ctx, table_name)
|
||||
.await?;
|
||||
self.table_cache.invalidate(&table_cache_key).await;
|
||||
|
||||
Ok(())
|
||||
async fn invalidate(&self, ctx: &Context, caches: Vec<CacheIdent>) -> MetaResult<()> {
|
||||
for cache in &caches {
|
||||
if let CacheIdent::TableName(table_name) = cache {
|
||||
let table_cache_key = format_full_table_name(
|
||||
&table_name.catalog_name,
|
||||
&table_name.schema_name,
|
||||
&table_name.table_name,
|
||||
);
|
||||
self.table_cache.invalidate(&table_cache_key).await;
|
||||
}
|
||||
}
|
||||
self.cache_invalidator.invalidate(ctx, caches).await
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
|
||||
use api::v1::region::{QueryRequest, RegionRequest};
|
||||
use api::v1::ResponseHeader;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use arrow_flight::Ticket;
|
||||
@@ -23,7 +23,7 @@ use async_trait::async_trait;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::flight::{FlightDecoder, FlightMessage};
|
||||
use common_meta::datanode_manager::{AffectedRows, Datanode};
|
||||
use common_meta::datanode_manager::{Datanode, HandleResponse};
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
@@ -46,7 +46,7 @@ pub struct RegionRequester {
|
||||
|
||||
#[async_trait]
|
||||
impl Datanode for RegionRequester {
|
||||
async fn handle(&self, request: RegionRequest) -> MetaResult<AffectedRows> {
|
||||
async fn handle(&self, request: RegionRequest) -> MetaResult<HandleResponse> {
|
||||
self.handle_inner(request).await.map_err(|err| {
|
||||
if err.should_retry() {
|
||||
meta_error::Error::RetryLater {
|
||||
@@ -165,7 +165,7 @@ impl RegionRequester {
|
||||
Ok(Box::pin(record_batch_stream))
|
||||
}
|
||||
|
||||
async fn handle_inner(&self, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle_inner(&self, request: RegionRequest) -> Result<HandleResponse> {
|
||||
let request_type = request
|
||||
.body
|
||||
.as_ref()
|
||||
@@ -178,10 +178,7 @@ impl RegionRequester {
|
||||
|
||||
let mut client = self.client.raw_region_client()?;
|
||||
|
||||
let RegionResponse {
|
||||
header,
|
||||
affected_rows,
|
||||
} = client
|
||||
let response = client
|
||||
.handle(request)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
@@ -195,19 +192,20 @@ impl RegionRequester {
|
||||
})?
|
||||
.into_inner();
|
||||
|
||||
check_response_header(header)?;
|
||||
check_response_header(&response.header)?;
|
||||
|
||||
Ok(affected_rows as _)
|
||||
Ok(HandleResponse::from_region_response(response))
|
||||
}
|
||||
|
||||
pub async fn handle(&self, request: RegionRequest) -> Result<AffectedRows> {
|
||||
pub async fn handle(&self, request: RegionRequest) -> Result<HandleResponse> {
|
||||
self.handle_inner(request).await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
|
||||
pub fn check_response_header(header: &Option<ResponseHeader>) -> Result<()> {
|
||||
let status = header
|
||||
.and_then(|header| header.status)
|
||||
.as_ref()
|
||||
.and_then(|header| header.status.as_ref())
|
||||
.context(IllegalDatabaseResponseSnafu {
|
||||
err_msg: "either response header or status is missing",
|
||||
})?;
|
||||
@@ -221,7 +219,7 @@ pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
|
||||
})?;
|
||||
ServerSnafu {
|
||||
code,
|
||||
msg: status.err_msg,
|
||||
msg: status.err_msg.clone(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
@@ -236,19 +234,19 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_check_response_header() {
|
||||
let result = check_response_header(None);
|
||||
let result = check_response_header(&None);
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader { status: None }));
|
||||
let result = check_response_header(&Some(ResponseHeader { status: None }));
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
let result = check_response_header(&Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: StatusCode::Success as u32,
|
||||
err_msg: String::default(),
|
||||
@@ -256,7 +254,7 @@ mod test {
|
||||
}));
|
||||
assert!(result.is_ok());
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
let result = check_response_header(&Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: u32::MAX,
|
||||
err_msg: String::default(),
|
||||
@@ -267,7 +265,7 @@ mod test {
|
||||
IllegalDatabaseResponse { .. }
|
||||
));
|
||||
|
||||
let result = check_response_header(Some(ResponseHeader {
|
||||
let result = check_response_header(&Some(ResponseHeader {
|
||||
status: Some(PbStatus {
|
||||
status_code: StatusCode::Internal as u32,
|
||||
err_msg: "blabla".to_string(),
|
||||
|
||||
@@ -18,6 +18,7 @@ use async_trait::async_trait;
|
||||
use common_base::AffectedRows;
|
||||
use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
|
||||
use common_query::error::Result;
|
||||
use common_query::Output;
|
||||
use session::context::QueryContextRef;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::{CompactTableRequest, DeleteRequest, FlushTableRequest, InsertRequest};
|
||||
@@ -26,7 +27,7 @@ use table::requests::{CompactTableRequest, DeleteRequest, FlushTableRequest, Ins
|
||||
#[async_trait]
|
||||
pub trait TableMutationHandler: Send + Sync {
|
||||
/// Inserts rows into the table.
|
||||
async fn insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
|
||||
async fn insert(&self, request: InsertRequest, ctx: QueryContextRef) -> Result<Output>;
|
||||
|
||||
/// Delete rows from the table.
|
||||
async fn delete(&self, request: DeleteRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
|
||||
|
||||
@@ -35,6 +35,7 @@ impl FunctionState {
|
||||
use common_base::AffectedRows;
|
||||
use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
|
||||
use common_query::error::Result;
|
||||
use common_query::Output;
|
||||
use session::context::QueryContextRef;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::{
|
||||
@@ -70,8 +71,8 @@ impl FunctionState {
|
||||
&self,
|
||||
_request: InsertRequest,
|
||||
_ctx: QueryContextRef,
|
||||
) -> Result<AffectedRows> {
|
||||
Ok(ROWS)
|
||||
) -> Result<Output> {
|
||||
Ok(Output::new_with_affected_rows(ROWS))
|
||||
}
|
||||
|
||||
async fn delete(
|
||||
|
||||
@@ -56,6 +56,18 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
|
||||
} = &sig;
|
||||
let arg_types = ok!(extract_input_types(inputs));
|
||||
|
||||
// with format like Float64Array
|
||||
let array_types = arg_types
|
||||
.iter()
|
||||
.map(|ty| {
|
||||
if let Type::Reference(TypeReference { elem, .. }) = ty {
|
||||
elem.as_ref().clone()
|
||||
} else {
|
||||
ty.clone()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// build the struct and its impl block
|
||||
// only do this when `display_name` is specified
|
||||
if let Ok(display_name) = get_ident(&arg_map, "display_name", arg_span) {
|
||||
@@ -64,6 +76,8 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
|
||||
vis,
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
display_name,
|
||||
array_types,
|
||||
ok!(get_ident(&arg_map, "ret", arg_span)),
|
||||
);
|
||||
result.extend(struct_code);
|
||||
}
|
||||
@@ -90,6 +104,8 @@ fn build_struct(
|
||||
vis: Visibility,
|
||||
name: Ident,
|
||||
display_name_ident: Ident,
|
||||
array_types: Vec<Type>,
|
||||
return_array_type: Ident,
|
||||
) -> TokenStream {
|
||||
let display_name = display_name_ident.to_string();
|
||||
quote! {
|
||||
@@ -114,18 +130,12 @@ fn build_struct(
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ruihang): this should be parameterized
|
||||
// time index column and value column
|
||||
fn input_type() -> Vec<DataType> {
|
||||
vec![
|
||||
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
|
||||
RangeArray::convert_data_type(DataType::Float64),
|
||||
]
|
||||
vec![#( RangeArray::convert_data_type(#array_types::new_null(0).data_type().clone()), )*]
|
||||
}
|
||||
|
||||
// TODO(ruihang): this should be parameterized
|
||||
fn return_type() -> DataType {
|
||||
DataType::Float64
|
||||
#return_array_type::new_null(0).data_type().clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -160,6 +170,7 @@ fn build_calc_fn(
|
||||
.map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
|
||||
.collect::<Vec<_>>();
|
||||
let first_range_array_name = range_array_names.first().unwrap().clone();
|
||||
let first_param_name = param_names.first().unwrap().clone();
|
||||
|
||||
quote! {
|
||||
impl #name {
|
||||
@@ -168,13 +179,29 @@ fn build_calc_fn(
|
||||
|
||||
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.to_data().into())?; )*
|
||||
|
||||
// TODO(ruihang): add ensure!()
|
||||
// check arrays len
|
||||
{
|
||||
let len_first = #first_range_array_name.len();
|
||||
#(
|
||||
if len_first != #range_array_names.len() {
|
||||
return Err(DataFusionError::Execution(format!("RangeArray have different lengths in PromQL function {}: array1={}, array2={}", #name::name(), len_first, #range_array_names.len())));
|
||||
}
|
||||
)*
|
||||
}
|
||||
|
||||
let mut result_array = Vec::new();
|
||||
for index in 0..#first_range_array_name.len(){
|
||||
#( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*
|
||||
|
||||
// TODO(ruihang): add ensure!() to check length
|
||||
// check element len
|
||||
{
|
||||
let len_first = #first_param_name.len();
|
||||
#(
|
||||
if len_first != #param_names.len() {
|
||||
return Err(DataFusionError::Execution(format!("RangeArray's element {} have different lengths in PromQL function {}: array1={}, array2={}", index, #name::name(), len_first, #param_names.len())));
|
||||
}
|
||||
)*
|
||||
}
|
||||
|
||||
let result = #fn_name(#( &#param_names, )*);
|
||||
result_array.push(result);
|
||||
|
||||
@@ -14,14 +14,12 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_info::TableInfoKey;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteKey;
|
||||
use crate::key::TableMetaKey;
|
||||
use crate::table_name::TableName;
|
||||
|
||||
/// KvBackend cache invalidator
|
||||
#[async_trait::async_trait]
|
||||
@@ -46,10 +44,7 @@ pub struct Context {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait CacheInvalidator: Send + Sync {
|
||||
// Invalidates table cache
|
||||
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> Result<()>;
|
||||
|
||||
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> Result<()>;
|
||||
async fn invalidate(&self, ctx: &Context, caches: Vec<CacheIdent>) -> Result<()>;
|
||||
}
|
||||
|
||||
pub type CacheInvalidatorRef = Arc<dyn CacheInvalidator>;
|
||||
@@ -58,11 +53,7 @@ pub struct DummyCacheInvalidator;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl CacheInvalidator for DummyCacheInvalidator {
|
||||
async fn invalidate_table_id(&self, _ctx: &Context, _table_id: TableId) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn invalidate_table_name(&self, _ctx: &Context, _table_name: TableName) -> Result<()> {
|
||||
async fn invalidate(&self, _ctx: &Context, _caches: Vec<CacheIdent>) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -72,21 +63,22 @@ impl<T> CacheInvalidator for T
|
||||
where
|
||||
T: KvCacheInvalidator,
|
||||
{
|
||||
async fn invalidate_table_name(&self, _ctx: &Context, table_name: TableName) -> Result<()> {
|
||||
let key: TableNameKey = (&table_name).into();
|
||||
|
||||
self.invalidate_key(&key.as_raw_key()).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn invalidate_table_id(&self, _ctx: &Context, table_id: TableId) -> Result<()> {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
self.invalidate_key(&key.as_raw_key()).await;
|
||||
|
||||
let key = &TableRouteKey { table_id };
|
||||
self.invalidate_key(&key.as_raw_key()).await;
|
||||
async fn invalidate(&self, _ctx: &Context, caches: Vec<CacheIdent>) -> Result<()> {
|
||||
for cache in caches {
|
||||
match cache {
|
||||
CacheIdent::TableId(table_id) => {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
self.invalidate_key(&key.as_raw_key()).await;
|
||||
|
||||
let key = &TableRouteKey { table_id };
|
||||
self.invalidate_key(&key.as_raw_key()).await;
|
||||
}
|
||||
CacheIdent::TableName(table_name) => {
|
||||
let key: TableNameKey = (&table_name).into();
|
||||
self.invalidate_key(&key.as_raw_key()).await
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,9 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::region::{QueryRequest, RegionRequest};
|
||||
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
|
||||
pub use common_base::AffectedRows;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
|
||||
@@ -25,7 +26,7 @@ use crate::peer::Peer;
|
||||
#[async_trait::async_trait]
|
||||
pub trait Datanode: Send + Sync {
|
||||
/// Handles DML, and DDL requests.
|
||||
async fn handle(&self, request: RegionRequest) -> Result<AffectedRows>;
|
||||
async fn handle(&self, request: RegionRequest) -> Result<HandleResponse>;
|
||||
|
||||
/// Handles query requests
|
||||
async fn handle_query(&self, request: QueryRequest) -> Result<SendableRecordBatchStream>;
|
||||
@@ -41,3 +42,27 @@ pub trait DatanodeManager: Send + Sync {
|
||||
}
|
||||
|
||||
pub type DatanodeManagerRef = Arc<dyn DatanodeManager>;
|
||||
|
||||
/// This result struct is derived from [RegionResponse]
|
||||
#[derive(Debug)]
|
||||
pub struct HandleResponse {
|
||||
pub affected_rows: AffectedRows,
|
||||
pub extension: HashMap<String, Vec<u8>>,
|
||||
}
|
||||
|
||||
impl HandleResponse {
|
||||
pub fn from_region_response(region_response: RegionResponse) -> Self {
|
||||
Self {
|
||||
affected_rows: region_response.affected_rows as _,
|
||||
extension: region_response.extension,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates one response without extension
|
||||
pub fn new(affected_rows: AffectedRows) -> Self {
|
||||
Self {
|
||||
affected_rows,
|
||||
extension: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ use crate::cache_invalidator::Context;
|
||||
use crate::ddl::utils::add_peer_context_if_needed;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, ConvertAlterTableRequestSnafu, Error, InvalidProtoMsgSnafu, Result};
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
@@ -333,11 +334,17 @@ impl AlterTableProcedure {
|
||||
|
||||
if matches!(alter_kind, Kind::RenameTable { .. }) {
|
||||
cache_invalidator
|
||||
.invalidate_table_name(&Context::default(), self.data.table_ref().into())
|
||||
.invalidate(
|
||||
&Context::default(),
|
||||
vec![CacheIdent::TableName(self.data.table_ref().into())],
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
cache_invalidator
|
||||
.invalidate_table_id(&Context::default(), self.data.table_id())
|
||||
.invalidate(
|
||||
&Context::default(),
|
||||
vec![CacheIdent::TableId(self.data.table_id())],
|
||||
)
|
||||
.await?;
|
||||
};
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ use crate::ddl::DdlContext;
|
||||
use crate::error::{Result, TableAlreadyExistsSnafu};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::lock_key::{TableLock, TableNameLock};
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
|
||||
@@ -70,6 +70,7 @@ impl CreateLogicalTablesProcedure {
|
||||
/// - Checks whether physical table exists.
|
||||
/// - Checks whether logical tables exist.
|
||||
/// - Allocates the table ids.
|
||||
/// - Modify tasks to sort logical columns on their names.
|
||||
///
|
||||
/// Abort(non-retry):
|
||||
/// - The physical table does not exist.
|
||||
@@ -130,7 +131,7 @@ impl CreateLogicalTablesProcedure {
|
||||
));
|
||||
}
|
||||
|
||||
// Allocates table ids
|
||||
// Allocates table ids and sort columns on their names.
|
||||
for (task, table_id) in tasks.iter_mut().zip(already_exists_tables_ids.iter()) {
|
||||
let table_id = if let Some(table_id) = table_id {
|
||||
*table_id
|
||||
@@ -141,6 +142,11 @@ impl CreateLogicalTablesProcedure {
|
||||
.await?
|
||||
};
|
||||
task.set_table_id(table_id);
|
||||
|
||||
// sort columns in task
|
||||
task.sort_columns();
|
||||
|
||||
common_telemetry::info!("[DEBUG] sorted task {:?}", task);
|
||||
}
|
||||
|
||||
self.creator
|
||||
@@ -307,8 +313,15 @@ impl Procedure for CreateLogicalTablesProcedure {
|
||||
}
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let mut lock_key = Vec::with_capacity(1 + self.creator.data.tasks.len());
|
||||
// CatalogLock, SchemaLock,
|
||||
// TableLock
|
||||
// TableNameLock(s)
|
||||
let mut lock_key = Vec::with_capacity(2 + 1 + self.creator.data.tasks.len());
|
||||
let table_ref = self.creator.data.tasks[0].table_ref();
|
||||
lock_key.push(CatalogLock::Read(table_ref.catalog).into());
|
||||
lock_key.push(SchemaLock::read(table_ref.catalog, table_ref.schema).into());
|
||||
lock_key.push(TableLock::Write(self.creator.data.physical_table_id()).into());
|
||||
|
||||
for task in &self.creator.data.tasks {
|
||||
lock_key.push(
|
||||
TableNameLock::new(
|
||||
|
||||
@@ -38,7 +38,7 @@ use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
|
||||
use crate::error::{self, Result, TableRouteNotFoundSnafu};
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::lock_key::TableNameLock;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::rpc::router::{
|
||||
@@ -343,11 +343,11 @@ impl Procedure for CreateTableProcedure {
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let table_ref = &self.creator.data.table_ref();
|
||||
|
||||
LockKey::single(TableNameLock::new(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
))
|
||||
LockKey::new(vec![
|
||||
CatalogLock::Read(table_ref.catalog).into(),
|
||||
SchemaLock::read(table_ref.catalog, table_ref.schema).into(),
|
||||
TableNameLock::new(table_ref.catalog, table_ref.schema, table_ref.table).into(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,42 +12,32 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::region::{
|
||||
region_request, DropRequest as PbDropRegionRequest, RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
pub mod executor;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, LockKey, Procedure, Result as ProcedureResult, Status,
|
||||
};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::future::join_all;
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use strum::AsRefStr;
|
||||
use table::metadata::{RawTableInfo, TableId};
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use self::executor::DropTableExecutor;
|
||||
use super::utils::handle_retry_error;
|
||||
use crate::cache_invalidator::Context;
|
||||
use crate::ddl::utils::add_peer_context_if_needed;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
|
||||
use crate::metrics;
|
||||
use crate::region_keeper::OperatingRegionGuard;
|
||||
use crate::rpc::ddl::DropTableTask;
|
||||
use crate::rpc::router::{
|
||||
find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
|
||||
};
|
||||
use crate::rpc::router::{operating_leader_regions, RegionRoute};
|
||||
|
||||
pub struct DropTableProcedure {
|
||||
/// The context of procedure runtime.
|
||||
@@ -58,7 +48,6 @@ pub struct DropTableProcedure {
|
||||
pub dropping_regions: Vec<OperatingRegionGuard>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl DropTableProcedure {
|
||||
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropTable";
|
||||
|
||||
@@ -85,31 +74,10 @@ impl DropTableProcedure {
|
||||
})
|
||||
}
|
||||
|
||||
async fn on_prepare(&mut self) -> Result<Status> {
|
||||
let table_ref = &self.data.table_ref();
|
||||
|
||||
let exist = self
|
||||
.context
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.exists(TableNameKey::new(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
))
|
||||
.await?;
|
||||
|
||||
if !exist && self.data.task.drop_if_exists {
|
||||
async fn on_prepare<'a>(&mut self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
if executor.on_prepare(&self.context).await?.stop() {
|
||||
return Ok(Status::done());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
exist,
|
||||
error::TableNotFoundSnafu {
|
||||
table_name: table_ref.to_string()
|
||||
}
|
||||
);
|
||||
|
||||
self.data.state = DropTableState::RemoveMetadata;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
@@ -144,98 +112,38 @@ impl DropTableProcedure {
|
||||
}
|
||||
|
||||
/// Removes the table metadata.
|
||||
async fn on_remove_metadata(&mut self) -> Result<Status> {
|
||||
async fn on_remove_metadata(&mut self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
self.register_dropping_regions()?;
|
||||
// NOTES: If the meta server is crashed after the `RemoveMetadata`,
|
||||
// Corresponding regions of this table on the Datanode will be closed automatically.
|
||||
// Then any future dropping operation will fail.
|
||||
|
||||
// TODO(weny): Considers introducing a RegionStatus to indicate the region is dropping.
|
||||
|
||||
let table_metadata_manager = &self.context.table_metadata_manager;
|
||||
let table_info_value = &self.data.table_info_value;
|
||||
let table_route_value = &self.data.table_route_value;
|
||||
let table_id = self.data.table_id();
|
||||
|
||||
table_metadata_manager
|
||||
.delete_table_metadata(table_info_value, table_route_value)
|
||||
executor
|
||||
.on_remove_metadata(
|
||||
&self.context,
|
||||
&self.data.table_info_value,
|
||||
&self.data.table_route_value,
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("Deleted table metadata for table {table_id}");
|
||||
|
||||
self.data.state = DropTableState::InvalidateTableCache;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
/// Broadcasts invalidate table cache instruction.
|
||||
async fn on_broadcast(&mut self) -> Result<Status> {
|
||||
let ctx = Context {
|
||||
subject: Some("Invalidate table cache by dropping table".to_string()),
|
||||
};
|
||||
|
||||
let cache_invalidator = &self.context.cache_invalidator;
|
||||
|
||||
cache_invalidator
|
||||
.invalidate_table_name(&ctx, self.data.table_ref().into())
|
||||
.await?;
|
||||
|
||||
cache_invalidator
|
||||
.invalidate_table_id(&ctx, self.data.table_id())
|
||||
.await?;
|
||||
|
||||
async fn on_broadcast(&mut self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
executor.invalidate_table_cache(&self.context).await?;
|
||||
self.data.state = DropTableState::DatanodeDropRegions;
|
||||
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
pub async fn on_datanode_drop_regions(&self) -> Result<Status> {
|
||||
let table_id = self.data.table_id();
|
||||
|
||||
let region_routes = &self.data.region_routes()?;
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
|
||||
|
||||
for datanode in leaders {
|
||||
let requester = self.context.datanode_manager.datanode(&datanode).await;
|
||||
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let region_ids = regions
|
||||
.iter()
|
||||
.map(|region_number| RegionId::new(table_id, *region_number))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for region_id in region_ids {
|
||||
debug!("Dropping region {region_id} on Datanode {datanode:?}");
|
||||
|
||||
let request = RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(region_request::Body::Drop(PbDropRegionRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
})),
|
||||
};
|
||||
|
||||
let datanode = datanode.clone();
|
||||
let requester = requester.clone();
|
||||
|
||||
drop_region_tasks.push(async move {
|
||||
if let Err(err) = requester.handle(request).await {
|
||||
if err.status_code() != StatusCode::RegionNotFound {
|
||||
return Err(add_peer_context_if_needed(datanode)(err));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
join_all(drop_region_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
pub async fn on_datanode_drop_regions(&self, executor: &DropTableExecutor) -> Result<Status> {
|
||||
executor
|
||||
.on_drop_regions(&self.context, &self.data.table_route_value)
|
||||
.await?;
|
||||
Ok(Status::done())
|
||||
}
|
||||
}
|
||||
@@ -247,17 +155,21 @@ impl Procedure for DropTableProcedure {
|
||||
}
|
||||
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let executor = DropTableExecutor::new(
|
||||
self.data.task.table_name(),
|
||||
self.data.table_id(),
|
||||
self.data.task.drop_if_exists,
|
||||
);
|
||||
let state = &self.data.state;
|
||||
|
||||
let _timer = metrics::METRIC_META_PROCEDURE_DROP_TABLE
|
||||
.with_label_values(&[state.as_ref()])
|
||||
.start_timer();
|
||||
|
||||
match self.data.state {
|
||||
DropTableState::Prepare => self.on_prepare().await,
|
||||
DropTableState::RemoveMetadata => self.on_remove_metadata().await,
|
||||
DropTableState::InvalidateTableCache => self.on_broadcast().await,
|
||||
DropTableState::DatanodeDropRegions => self.on_datanode_drop_regions().await,
|
||||
DropTableState::Prepare => self.on_prepare(&executor).await,
|
||||
DropTableState::RemoveMetadata => self.on_remove_metadata(&executor).await,
|
||||
DropTableState::InvalidateTableCache => self.on_broadcast(&executor).await,
|
||||
DropTableState::DatanodeDropRegions => self.on_datanode_drop_regions(&executor).await,
|
||||
}
|
||||
.map_err(handle_retry_error)
|
||||
}
|
||||
|
||||
280
src/common/meta/src/ddl/drop_table/executor.rs
Normal file
280
src/common/meta/src/ddl/drop_table/executor.rs
Normal file
@@ -0,0 +1,280 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::region::{
|
||||
region_request, DropRequest as PbDropRegionRequest, RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use futures::future::join_all;
|
||||
use snafu::ensure;
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::cache_invalidator::Context;
|
||||
use crate::ddl::utils::add_peer_context_if_needed;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::{self, Result};
|
||||
use crate::instruction::CacheIdent;
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::DeserializedValueWithBytes;
|
||||
use crate::rpc::router::{find_leader_regions, find_leaders};
|
||||
use crate::table_name::TableName;
|
||||
|
||||
/// [Control] indicated to the caller whether to go to the next step.
|
||||
#[derive(Debug)]
|
||||
pub enum Control<T> {
|
||||
Continue(T),
|
||||
Stop,
|
||||
}
|
||||
|
||||
impl<T> Control<T> {
|
||||
/// Returns true if it's [Control::Stop].
|
||||
pub fn stop(&self) -> bool {
|
||||
matches!(self, Control::Stop)
|
||||
}
|
||||
}
|
||||
|
||||
impl DropTableExecutor {
|
||||
/// Returns the [DropTableExecutor].
|
||||
pub fn new(table: TableName, table_id: TableId, drop_if_exists: bool) -> Self {
|
||||
Self {
|
||||
table,
|
||||
table_id,
|
||||
drop_if_exists,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [DropTableExecutor] performs:
|
||||
/// - Drops the metadata of the table.
|
||||
/// - Invalidates the cache on the Frontend nodes.
|
||||
/// - Drops the regions on the Datanode nodes.
|
||||
pub struct DropTableExecutor {
|
||||
table: TableName,
|
||||
table_id: TableId,
|
||||
drop_if_exists: bool,
|
||||
}
|
||||
|
||||
impl DropTableExecutor {
|
||||
/// Checks whether table exists.
|
||||
/// - Early returns if table not exists and `drop_if_exists` is `true`.
|
||||
/// - Throws an error if table not exists and `drop_if_exists` is `false`.
|
||||
pub async fn on_prepare(&self, ctx: &DdlContext) -> Result<Control<()>> {
|
||||
let table_ref = self.table.table_ref();
|
||||
|
||||
let exist = ctx
|
||||
.table_metadata_manager
|
||||
.table_name_manager()
|
||||
.exists(TableNameKey::new(
|
||||
table_ref.catalog,
|
||||
table_ref.schema,
|
||||
table_ref.table,
|
||||
))
|
||||
.await?;
|
||||
|
||||
if !exist && self.drop_if_exists {
|
||||
return Ok(Control::Stop);
|
||||
}
|
||||
|
||||
ensure!(
|
||||
exist,
|
||||
error::TableNotFoundSnafu {
|
||||
table_name: table_ref.to_string()
|
||||
}
|
||||
);
|
||||
|
||||
Ok(Control::Continue(()))
|
||||
}
|
||||
|
||||
/// Removes the table metadata.
|
||||
pub async fn on_remove_metadata(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
|
||||
) -> Result<()> {
|
||||
ctx.table_metadata_manager
|
||||
.delete_table_metadata(table_info_value, table_route_value)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Invalidates frontend caches
|
||||
pub async fn invalidate_table_cache(&self, ctx: &DdlContext) -> Result<()> {
|
||||
let cache_invalidator = &ctx.cache_invalidator;
|
||||
let ctx = Context {
|
||||
subject: Some("Invalidate table cache by dropping table".to_string()),
|
||||
};
|
||||
|
||||
cache_invalidator
|
||||
.invalidate(
|
||||
&ctx,
|
||||
vec![
|
||||
CacheIdent::TableName(self.table.table_ref().into()),
|
||||
CacheIdent::TableId(self.table_id),
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Drops region on datanode.
|
||||
pub async fn on_drop_regions(
|
||||
&self,
|
||||
ctx: &DdlContext,
|
||||
table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
|
||||
) -> Result<()> {
|
||||
// The `table_route_value` always be the physical table route.
|
||||
let region_routes = table_route_value.region_routes()?;
|
||||
let leaders = find_leaders(region_routes);
|
||||
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
|
||||
let table_id = self.table_id;
|
||||
|
||||
for datanode in leaders {
|
||||
let requester = ctx.datanode_manager.datanode(&datanode).await;
|
||||
let regions = find_leader_regions(region_routes, &datanode);
|
||||
let region_ids = regions
|
||||
.iter()
|
||||
.map(|region_number| RegionId::new(table_id, *region_number))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for region_id in region_ids {
|
||||
debug!("Dropping region {region_id} on Datanode {datanode:?}");
|
||||
let request = RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(region_request::Body::Drop(PbDropRegionRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
})),
|
||||
};
|
||||
let datanode = datanode.clone();
|
||||
let requester = requester.clone();
|
||||
drop_region_tasks.push(async move {
|
||||
if let Err(err) = requester.handle(request).await {
|
||||
if err.status_code() != StatusCode::RegionNotFound {
|
||||
return Err(add_peer_context_if_needed(datanode)(err));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
join_all(drop_region_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{ColumnDataType, SemanticType};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use super::*;
|
||||
use crate::ddl::test_util::create_table::build_raw_table_info_from_expr;
|
||||
use crate::ddl::test_util::{TestColumnDefBuilder, TestCreateTableExprBuilder};
|
||||
use crate::table_name::TableName;
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeManager};
|
||||
|
||||
fn test_create_raw_table_info(name: &str) -> RawTableInfo {
|
||||
let create_table = TestCreateTableExprBuilder::default()
|
||||
.column_defs([
|
||||
TestColumnDefBuilder::default()
|
||||
.name("ts")
|
||||
.data_type(ColumnDataType::TimestampMillisecond)
|
||||
.semantic_type(SemanticType::Timestamp)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
TestColumnDefBuilder::default()
|
||||
.name("host")
|
||||
.data_type(ColumnDataType::String)
|
||||
.semantic_type(SemanticType::Tag)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
TestColumnDefBuilder::default()
|
||||
.name("cpu")
|
||||
.data_type(ColumnDataType::Float64)
|
||||
.semantic_type(SemanticType::Field)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into(),
|
||||
])
|
||||
.time_index("ts")
|
||||
.primary_keys(["host".into()])
|
||||
.table_name(name)
|
||||
.build()
|
||||
.unwrap()
|
||||
.into();
|
||||
build_raw_table_info_from_expr(&create_table)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare() {
|
||||
// Drops if exists
|
||||
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ctx = new_ddl_context(datanode_manager);
|
||||
let executor = DropTableExecutor::new(
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
|
||||
1024,
|
||||
true,
|
||||
);
|
||||
let ctrl = executor.on_prepare(&ctx).await.unwrap();
|
||||
assert!(ctrl.stop());
|
||||
|
||||
// Drops a non-exists table
|
||||
let executor = DropTableExecutor::new(
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
|
||||
1024,
|
||||
false,
|
||||
);
|
||||
let err = executor.on_prepare(&ctx).await.unwrap_err();
|
||||
assert_matches!(err, error::Error::TableNotFound { .. });
|
||||
|
||||
// Drops a exists table
|
||||
let executor = DropTableExecutor::new(
|
||||
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
|
||||
1024,
|
||||
false,
|
||||
);
|
||||
let raw_table_info = test_create_raw_table_info("my_table");
|
||||
ctx.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
raw_table_info,
|
||||
TableRouteValue::physical(vec![]),
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let ctrl = executor.on_prepare(&ctx).await.unwrap();
|
||||
assert!(!ctrl.stop());
|
||||
}
|
||||
}
|
||||
@@ -28,6 +28,7 @@ use common_telemetry::debug;
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::RawTableInfo;
|
||||
|
||||
use crate::datanode_manager::HandleResponse;
|
||||
use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
|
||||
use crate::ddl::test_util::create_table::build_raw_table_info_from_expr;
|
||||
use crate::ddl::test_util::{TestColumnDefBuilder, TestCreateTableExprBuilder};
|
||||
@@ -36,7 +37,7 @@ use crate::error::{Error, Result};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::test_util::{new_ddl_context, AffectedRows, MockDatanodeHandler, MockDatanodeManager};
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};
|
||||
|
||||
// Note: this code may be duplicated with others.
|
||||
// However, it's by design, ensures the tests are easy to be modified or added.
|
||||
@@ -332,9 +333,9 @@ pub struct NaiveDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for NaiveDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
|
||||
Ok(0)
|
||||
Ok(HandleResponse::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
|
||||
@@ -26,6 +26,7 @@ use common_procedure_test::MockContextProvider;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
|
||||
use crate::datanode_manager::HandleResponse;
|
||||
use crate::ddl::create_table::CreateTableProcedure;
|
||||
use crate::ddl::test_util::create_table::build_raw_table_info_from_expr;
|
||||
use crate::ddl::test_util::{TestColumnDefBuilder, TestCreateTableExprBuilder};
|
||||
@@ -34,11 +35,11 @@ use crate::error::{Error, Result};
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
use crate::test_util::{new_ddl_context, AffectedRows, MockDatanodeHandler, MockDatanodeManager};
|
||||
use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager};
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for () {
|
||||
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<HandleResponse> {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
@@ -176,7 +177,7 @@ pub struct RetryErrorDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for RetryErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning retry later for request: {request:?}, peer: {peer:?}");
|
||||
Err(Error::RetryLater {
|
||||
source: BoxedError::new(
|
||||
@@ -220,7 +221,7 @@ pub struct UnexpectedErrorDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for UnexpectedErrorDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning mock error for request: {request:?}, peer: {peer:?}");
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "mock error",
|
||||
@@ -260,9 +261,9 @@ pub struct NaiveDatanodeHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for NaiveDatanodeHandler {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse> {
|
||||
debug!("Returning Ok(0) for request: {request:?}, peer: {peer:?}");
|
||||
Ok(0)
|
||||
Ok(HandleResponse::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
|
||||
@@ -124,7 +124,7 @@ impl OpenRegion {
|
||||
}
|
||||
|
||||
/// The instruction of downgrading leader region.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct DowngradeRegion {
|
||||
/// The [RegionId].
|
||||
pub region_id: RegionId,
|
||||
@@ -137,7 +137,7 @@ impl Display for DowngradeRegion {
|
||||
}
|
||||
|
||||
/// Upgrades a follower region to leader region.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct UpgradeRegion {
|
||||
/// The [RegionId].
|
||||
pub region_id: RegionId,
|
||||
@@ -151,7 +151,14 @@ pub struct UpgradeRegion {
|
||||
pub wait_for_replay_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Display)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Display, PartialEq, Eq)]
|
||||
/// The identifier of cache.
|
||||
pub enum CacheIdent {
|
||||
TableId(TableId),
|
||||
TableName(TableName),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Display, PartialEq)]
|
||||
pub enum Instruction {
|
||||
/// Opens a region.
|
||||
///
|
||||
@@ -165,10 +172,8 @@ pub enum Instruction {
|
||||
UpgradeRegion(UpgradeRegion),
|
||||
/// Downgrades a region.
|
||||
DowngradeRegion(DowngradeRegion),
|
||||
/// Invalidates a specified table cache.
|
||||
InvalidateTableIdCache(TableId),
|
||||
/// Invalidates a specified table name index cache.
|
||||
InvalidateTableNameCache(TableName),
|
||||
/// Invalidates batch cache.
|
||||
InvalidateCaches(Vec<CacheIdent>),
|
||||
}
|
||||
|
||||
/// The reply of [UpgradeRegion].
|
||||
|
||||
@@ -90,13 +90,13 @@ use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::router::{region_distribution, RegionRoute, RegionStatus};
|
||||
use crate::DatanodeId;
|
||||
|
||||
pub const REMOVED_PREFIX: &str = "__removed";
|
||||
|
||||
pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.]*";
|
||||
pub const MAINTENANCE_KEY: &str = "maintenance";
|
||||
|
||||
const DATANODE_TABLE_KEY_PREFIX: &str = "__dn_table";
|
||||
const TABLE_REGION_KEY_PREFIX: &str = "__table_region";
|
||||
|
||||
pub const REMOVED_PREFIX: &str = "__removed";
|
||||
pub const TABLE_INFO_KEY_PREFIX: &str = "__table_info";
|
||||
pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
|
||||
pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
|
||||
@@ -140,10 +140,6 @@ lazy_static! {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn to_removed_key(key: &str) -> String {
|
||||
format!("{REMOVED_PREFIX}-{key}")
|
||||
}
|
||||
|
||||
pub trait TableMetaKey {
|
||||
fn as_raw_key(&self) -> Vec<u8>;
|
||||
}
|
||||
@@ -565,14 +561,10 @@ impl TableMetadataManager {
|
||||
&table_info.name,
|
||||
);
|
||||
|
||||
let delete_table_name_txn = self
|
||||
.table_name_manager()
|
||||
.build_delete_txn(&table_name, table_id)?;
|
||||
let delete_table_name_txn = self.table_name_manager().build_delete_txn(&table_name)?;
|
||||
|
||||
// Deletes table info.
|
||||
let delete_table_info_txn = self
|
||||
.table_info_manager()
|
||||
.build_delete_txn(table_id, table_info_value)?;
|
||||
let delete_table_info_txn = self.table_info_manager().build_delete_txn(table_id)?;
|
||||
|
||||
// Deletes datanode table key value pairs.
|
||||
let distribution = region_distribution(table_route_value.region_routes()?);
|
||||
@@ -584,7 +576,7 @@ impl TableMetadataManager {
|
||||
let delete_table_route_txn = self
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.build_delete_txn(table_id, table_route_value)?;
|
||||
.build_delete_txn(table_id)?;
|
||||
|
||||
let txn = Txn::merge_all(vec![
|
||||
delete_table_name_txn,
|
||||
@@ -871,7 +863,7 @@ mod tests {
|
||||
use crate::key::table_info::TableInfoValue;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
use crate::key::table_route::TableRouteValue;
|
||||
use crate::key::{to_removed_key, DeserializedValueWithBytes, TableMetadataManager};
|
||||
use crate::key::{DeserializedValueWithBytes, TableMetadataManager};
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::router::{region_distribution, Region, RegionRoute, RegionStatus};
|
||||
@@ -904,13 +896,6 @@ mod tests {
|
||||
assert_eq!(decoded.bytes, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_removed_key() {
|
||||
let key = "test_key";
|
||||
let removed = "__removed-test_key";
|
||||
assert_eq!(removed, to_removed_key(key));
|
||||
}
|
||||
|
||||
fn new_test_region_route() -> RegionRoute {
|
||||
new_region_route(1, 2)
|
||||
}
|
||||
@@ -1148,24 +1133,20 @@ mod tests {
|
||||
.unwrap()
|
||||
.is_empty());
|
||||
// Checks removed values
|
||||
let removed_table_info = table_metadata_manager
|
||||
let table_info = table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get_removed(table_id)
|
||||
.get(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert_eq!(removed_table_info.table_info, table_info);
|
||||
.unwrap();
|
||||
assert!(table_info.is_none());
|
||||
|
||||
let removed_table_route = table_metadata_manager
|
||||
let table_route = table_metadata_manager
|
||||
.table_route_manager()
|
||||
.table_route_storage()
|
||||
.get_raw_removed(table_id)
|
||||
.get(table_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert_eq!(removed_table_route.region_routes().unwrap(), region_routes);
|
||||
.unwrap();
|
||||
assert!(table_route.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -20,7 +20,7 @@ use table::table_reference::TableReference;
|
||||
|
||||
use super::{txn_helper, DeserializedValueWithBytes, TableMetaValue, TABLE_INFO_KEY_PREFIX};
|
||||
use crate::error::Result;
|
||||
use crate::key::{to_removed_key, TableMetaKey};
|
||||
use crate::key::TableMetaKey;
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::store::BatchGetRequest;
|
||||
@@ -157,38 +157,15 @@ impl TableInfoManager {
|
||||
}
|
||||
|
||||
/// Builds a delete table info transaction.
|
||||
pub(crate) fn build_delete_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
|
||||
) -> Result<Txn> {
|
||||
pub(crate) fn build_delete_txn(&self, table_id: TableId) -> Result<Txn> {
|
||||
let key = TableInfoKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
let raw_value = table_info_value.get_raw_bytes();
|
||||
let removed_key = to_removed_key(&String::from_utf8_lossy(&raw_key));
|
||||
|
||||
let txn = Txn::new().and_then(vec![
|
||||
TxnOp::Delete(raw_key),
|
||||
TxnOp::Put(removed_key.into_bytes(), raw_value),
|
||||
]);
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Delete(raw_key)]);
|
||||
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub async fn get_removed(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Option<DeserializedValueWithBytes<TableInfoValue>>> {
|
||||
let key = TableInfoKey::new(table_id).to_string();
|
||||
let removed_key = to_removed_key(&key).into_bytes();
|
||||
self.kv_backend
|
||||
.get(&removed_key)
|
||||
.await?
|
||||
.map(|x| DeserializedValueWithBytes::from_inner_slice(&x.value))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
|
||||
@@ -22,7 +22,7 @@ use table::metadata::TableId;
|
||||
|
||||
use super::{TableMetaValue, TABLE_NAME_KEY_PATTERN, TABLE_NAME_KEY_PREFIX};
|
||||
use crate::error::{Error, InvalidTableMetadataSnafu, Result};
|
||||
use crate::key::{to_removed_key, TableMetaKey};
|
||||
use crate::key::TableMetaKey;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
use crate::kv_backend::txn::{Txn, TxnOp};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
@@ -195,20 +195,9 @@ impl TableNameManager {
|
||||
}
|
||||
|
||||
/// Builds a delete table name transaction. It only executes while the primary keys comparing successes.
|
||||
pub(crate) fn build_delete_txn(
|
||||
&self,
|
||||
key: &TableNameKey<'_>,
|
||||
table_id: TableId,
|
||||
) -> Result<Txn> {
|
||||
pub(crate) fn build_delete_txn(&self, key: &TableNameKey<'_>) -> Result<Txn> {
|
||||
let raw_key = key.as_raw_key();
|
||||
let value = TableNameValue::new(table_id);
|
||||
let raw_value = value.try_as_raw_value()?;
|
||||
let removed_key = to_removed_key(&String::from_utf8_lossy(&raw_key));
|
||||
|
||||
let txn = Txn::new().and_then(vec![
|
||||
TxnOp::Delete(raw_key),
|
||||
TxnOp::Put(removed_key.into_bytes(), raw_value),
|
||||
]);
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Delete(raw_key)]);
|
||||
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ use crate::error::{
|
||||
self, MetadataCorruptionSnafu, Result, SerdeJsonSnafu, TableRouteNotFoundSnafu,
|
||||
UnexpectedLogicalRouteTableSnafu,
|
||||
};
|
||||
use crate::key::{to_removed_key, RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
|
||||
use crate::key::{RegionDistribution, TableMetaKey, TABLE_ROUTE_PREFIX};
|
||||
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::rpc::router::{region_distribution, RegionRoute};
|
||||
@@ -485,38 +485,15 @@ impl TableRouteStorage {
|
||||
|
||||
/// Builds a delete table route transaction,
|
||||
/// it expected the remote value equals the `table_route_value`.
|
||||
pub(crate) fn build_delete_txn(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_route_value: &DeserializedValueWithBytes<TableRouteValue>,
|
||||
) -> Result<Txn> {
|
||||
pub(crate) fn build_delete_txn(&self, table_id: TableId) -> Result<Txn> {
|
||||
let key = TableRouteKey::new(table_id);
|
||||
let raw_key = key.as_raw_key();
|
||||
let raw_value = table_route_value.get_raw_bytes();
|
||||
let removed_key = to_removed_key(&String::from_utf8_lossy(&raw_key));
|
||||
|
||||
let txn = Txn::new().and_then(vec![
|
||||
TxnOp::Delete(raw_key),
|
||||
TxnOp::Put(removed_key.into_bytes(), raw_value),
|
||||
]);
|
||||
let txn = Txn::new().and_then(vec![TxnOp::Delete(raw_key)]);
|
||||
|
||||
Ok(txn)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub async fn get_raw_removed(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
|
||||
let key = TableRouteKey::new(table_id).to_string();
|
||||
let removed_key = to_removed_key(&key).into_bytes();
|
||||
self.kv_backend
|
||||
.get(&removed_key)
|
||||
.await?
|
||||
.map(|x| DeserializedValueWithBytes::from_inner_slice(&x.value))
|
||||
.transpose()
|
||||
}
|
||||
|
||||
/// Returns the [`TableRouteValue`].
|
||||
pub async fn get(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
|
||||
let key = TableRouteKey::new(table_id);
|
||||
|
||||
@@ -22,7 +22,7 @@ use api::v1::meta::{
|
||||
DropTableTask as PbDropTableTask, DropTableTasks as PbDropTableTasks, Partition, ProcedureId,
|
||||
TruncateTableTask as PbTruncateTableTask,
|
||||
};
|
||||
use api::v1::{AlterExpr, CreateTableExpr, DropTableExpr, TruncateTableExpr};
|
||||
use api::v1::{AlterExpr, CreateTableExpr, DropTableExpr, SemanticType, TruncateTableExpr};
|
||||
use base64::engine::general_purpose;
|
||||
use base64::Engine as _;
|
||||
use prost::Message;
|
||||
@@ -368,6 +368,44 @@ impl CreateTableTask {
|
||||
pub fn set_table_id(&mut self, table_id: TableId) {
|
||||
self.table_info.ident.table_id = table_id;
|
||||
}
|
||||
|
||||
/// Sort the columns in [CreateTableExpr] and [RawTableInfo].
|
||||
///
|
||||
/// This function won't do any check or verification. Caller should
|
||||
/// ensure this task is valid.
|
||||
pub fn sort_columns(&mut self) {
|
||||
// sort create table expr
|
||||
// sort column_defs by name
|
||||
self.create_table
|
||||
.column_defs
|
||||
.sort_unstable_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
// compute new indices of sorted columns
|
||||
// this part won't do any check or verification.
|
||||
let mut primary_key_indices = Vec::with_capacity(self.create_table.primary_keys.len());
|
||||
let mut value_indices =
|
||||
Vec::with_capacity(self.create_table.column_defs.len() - primary_key_indices.len() - 1);
|
||||
let mut timestamp_index = None;
|
||||
for (index, col) in self.create_table.column_defs.iter().enumerate() {
|
||||
if self.create_table.primary_keys.contains(&col.name) {
|
||||
primary_key_indices.push(index);
|
||||
} else if col.semantic_type == SemanticType::Timestamp as i32 {
|
||||
timestamp_index = Some(index);
|
||||
} else {
|
||||
value_indices.push(index);
|
||||
}
|
||||
}
|
||||
|
||||
// overwrite table info
|
||||
self.table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas
|
||||
.sort_unstable_by(|a, b| a.name.cmp(&b.name));
|
||||
self.table_info.meta.schema.timestamp_index = timestamp_index;
|
||||
self.table_info.meta.primary_key_indices = primary_key_indices;
|
||||
self.table_info.meta.value_indices = value_indices;
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for CreateTableTask {
|
||||
@@ -555,9 +593,11 @@ impl TryFrom<TruncateTableTask> for PbTruncateTableTask {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{AlterExpr, CreateTableExpr};
|
||||
use datatypes::schema::SchemaBuilder;
|
||||
use table::metadata::RawTableInfo;
|
||||
use api::v1::{AlterExpr, ColumnDef, CreateTableExpr, SemanticType};
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, SchemaBuilder};
|
||||
use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
|
||||
use store_api::storage::ConcreteDataType;
|
||||
use table::metadata::{RawTableInfo, RawTableMeta, TableType};
|
||||
use table::test_util::table_info::test_table_info;
|
||||
|
||||
use super::{AlterTableTask, CreateTableTask};
|
||||
@@ -589,4 +629,108 @@ mod tests {
|
||||
let de = serde_json::from_slice(&output).unwrap();
|
||||
assert_eq!(task, de);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sort_columns() {
|
||||
// construct RawSchema
|
||||
let raw_schema = RawSchema {
|
||||
column_schemas: vec![
|
||||
ColumnSchema::new(
|
||||
"column3".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"column1".to_string(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"column2".to_string(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
],
|
||||
timestamp_index: Some(1),
|
||||
version: 0,
|
||||
};
|
||||
|
||||
// construct RawTableMeta
|
||||
let raw_table_meta = RawTableMeta {
|
||||
schema: raw_schema,
|
||||
primary_key_indices: vec![0],
|
||||
value_indices: vec![2],
|
||||
engine: METRIC_ENGINE_NAME.to_string(),
|
||||
next_column_id: 0,
|
||||
region_numbers: vec![0],
|
||||
options: Default::default(),
|
||||
created_on: Default::default(),
|
||||
partition_key_indices: Default::default(),
|
||||
};
|
||||
|
||||
// construct RawTableInfo
|
||||
let raw_table_info = RawTableInfo {
|
||||
ident: Default::default(),
|
||||
meta: raw_table_meta,
|
||||
name: Default::default(),
|
||||
desc: Default::default(),
|
||||
catalog_name: Default::default(),
|
||||
schema_name: Default::default(),
|
||||
table_type: TableType::Base,
|
||||
};
|
||||
|
||||
// construct create table expr
|
||||
let create_table_expr = CreateTableExpr {
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "column3".to_string(),
|
||||
semantic_type: SemanticType::Tag as i32,
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDef {
|
||||
name: "column1".to_string(),
|
||||
semantic_type: SemanticType::Timestamp as i32,
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDef {
|
||||
name: "column2".to_string(),
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
primary_keys: vec!["column3".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut create_table_task =
|
||||
CreateTableTask::new(create_table_expr, Vec::new(), raw_table_info);
|
||||
|
||||
// Call the sort_columns method
|
||||
create_table_task.sort_columns();
|
||||
|
||||
// Assert that the columns are sorted correctly
|
||||
assert_eq!(
|
||||
create_table_task.create_table.column_defs[0].name,
|
||||
"column1".to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
create_table_task.create_table.column_defs[1].name,
|
||||
"column2".to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
create_table_task.create_table.column_defs[2].name,
|
||||
"column3".to_string()
|
||||
);
|
||||
|
||||
// Assert that the table_info is updated correctly
|
||||
assert_eq!(
|
||||
create_table_task.table_info.meta.schema.timestamp_index,
|
||||
Some(0)
|
||||
);
|
||||
assert_eq!(
|
||||
create_table_task.table_info.meta.primary_key_indices,
|
||||
vec![2]
|
||||
);
|
||||
assert_eq!(create_table_task.table_info.meta.value_indices, vec![1]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,9 @@ pub use common_base::AffectedRows;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
|
||||
use crate::cache_invalidator::DummyCacheInvalidator;
|
||||
use crate::datanode_manager::{Datanode, DatanodeManager, DatanodeManagerRef, DatanodeRef};
|
||||
use crate::datanode_manager::{
|
||||
Datanode, DatanodeManager, DatanodeManagerRef, DatanodeRef, HandleResponse,
|
||||
};
|
||||
use crate::ddl::table_meta::TableMetadataAllocator;
|
||||
use crate::ddl::DdlContext;
|
||||
use crate::error::Result;
|
||||
@@ -32,7 +34,7 @@ use crate::wal_options_allocator::WalOptionsAllocator;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait MockDatanodeHandler: Sync + Send + Clone {
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<AffectedRows>;
|
||||
async fn handle(&self, peer: &Peer, request: RegionRequest) -> Result<HandleResponse>;
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
@@ -62,7 +64,7 @@ struct MockDatanode<T> {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<T: MockDatanodeHandler> Datanode for MockDatanode<T> {
|
||||
async fn handle(&self, request: RegionRequest) -> Result<AffectedRows> {
|
||||
async fn handle(&self, request: RegionRequest) -> Result<HandleResponse> {
|
||||
self.handler.handle(&self.peer, request).await
|
||||
}
|
||||
|
||||
|
||||
@@ -16,4 +16,5 @@
|
||||
pub const GREPTIME_EXEC_PREFIX: &str = "greptime_exec_";
|
||||
|
||||
/// Execution cost metrics key
|
||||
pub const GREPTIME_EXEC_COST: &str = "greptime_exec_cost";
|
||||
pub const GREPTIME_EXEC_READ_COST: &str = "greptime_exec_read_cost";
|
||||
pub const GREPTIME_EXEC_WRITE_COST: &str = "greptime_exec_write_cost";
|
||||
|
||||
@@ -17,4 +17,4 @@
|
||||
/// since `plugins` crate is at the top depending on crates like `frontend` and `datanode`
|
||||
mod consts;
|
||||
|
||||
pub use consts::{GREPTIME_EXEC_COST, GREPTIME_EXEC_PREFIX};
|
||||
pub use consts::{GREPTIME_EXEC_PREFIX, GREPTIME_EXEC_READ_COST, GREPTIME_EXEC_WRITE_COST};
|
||||
|
||||
@@ -40,7 +40,7 @@ pub struct Output {
|
||||
/// Original Output struct
|
||||
/// carrying result data to response/client/user interface
|
||||
pub enum OutputData {
|
||||
AffectedRows(usize),
|
||||
AffectedRows(OutputRows),
|
||||
RecordBatches(RecordBatches),
|
||||
Stream(SendableRecordBatchStream),
|
||||
}
|
||||
@@ -50,11 +50,11 @@ pub enum OutputData {
|
||||
pub struct OutputMeta {
|
||||
/// May exist for query output. One can retrieve execution metrics from this plan.
|
||||
pub plan: Option<Arc<dyn PhysicalPlan>>,
|
||||
pub cost: usize,
|
||||
pub cost: OutputCost,
|
||||
}
|
||||
|
||||
impl Output {
|
||||
pub fn new_with_affected_rows(affected_rows: usize) -> Self {
|
||||
pub fn new_with_affected_rows(affected_rows: OutputRows) -> Self {
|
||||
Self {
|
||||
data: OutputData::AffectedRows(affected_rows),
|
||||
meta: Default::default(),
|
||||
@@ -78,6 +78,13 @@ impl Output {
|
||||
pub fn new(data: OutputData, meta: OutputMeta) -> Self {
|
||||
Self { data, meta }
|
||||
}
|
||||
|
||||
pub fn extract_rows_and_cost(&self) -> (OutputRows, OutputCost) {
|
||||
match self.data {
|
||||
OutputData::AffectedRows(rows) => (rows, self.meta.cost),
|
||||
_ => (0, self.meta.cost),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for OutputData {
|
||||
@@ -133,3 +140,6 @@ impl From<&AddColumnLocation> for Location {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type OutputRows = usize;
|
||||
pub type OutputCost = usize;
|
||||
|
||||
@@ -81,9 +81,7 @@ impl RegionHeartbeatResponseHandler {
|
||||
Instruction::UpgradeRegion(upgrade_region) => Ok(Box::new(move |handler_context| {
|
||||
handler_context.handle_upgrade_region_instruction(upgrade_region)
|
||||
})),
|
||||
Instruction::InvalidateTableIdCache(_) | Instruction::InvalidateTableNameCache(_) => {
|
||||
InvalidHeartbeatResponseSnafu.fail()
|
||||
}
|
||||
Instruction::InvalidateCaches(_) => InvalidHeartbeatResponseSnafu.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::datanode_manager::HandleResponse;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
||||
use common_query::{DfPhysicalPlan, OutputData};
|
||||
@@ -128,7 +129,7 @@ impl RegionServer {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
) -> Result<HandleResponse> {
|
||||
self.inner.handle_request(region_id, request).await
|
||||
}
|
||||
|
||||
@@ -267,11 +268,10 @@ impl RegionServerHandler for RegionServer {
|
||||
results
|
||||
};
|
||||
|
||||
// merge results by simply sum up affected rows.
|
||||
// only insert/delete will have multiple results.
|
||||
// merge results by sum up affected rows and merge extensions.
|
||||
let mut affected_rows = 0;
|
||||
for result in results {
|
||||
affected_rows += result;
|
||||
affected_rows += result.affected_rows;
|
||||
}
|
||||
|
||||
Ok(RegionResponse {
|
||||
@@ -282,6 +282,7 @@ impl RegionServerHandler for RegionServer {
|
||||
}),
|
||||
}),
|
||||
affected_rows: affected_rows as _,
|
||||
extension: Default::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -462,7 +463,7 @@ impl RegionServerInner {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
) -> Result<HandleResponse> {
|
||||
let request_type = request.request_type();
|
||||
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
|
||||
.with_label_values(&[request_type])
|
||||
@@ -487,7 +488,7 @@ impl RegionServerInner {
|
||||
|
||||
let engine = match self.get_engine(region_id, ®ion_change)? {
|
||||
CurrentEngine::Engine(engine) => engine,
|
||||
CurrentEngine::EarlyReturn(rows) => return Ok(rows),
|
||||
CurrentEngine::EarlyReturn(rows) => return Ok(HandleResponse::new(rows)),
|
||||
};
|
||||
|
||||
// Sets corresponding region status to registering/deregistering before the operation.
|
||||
@@ -502,7 +503,10 @@ impl RegionServerInner {
|
||||
// Sets corresponding region status to ready.
|
||||
self.set_region_status_ready(region_id, engine, region_change)
|
||||
.await?;
|
||||
Ok(result)
|
||||
Ok(HandleResponse {
|
||||
affected_rows: result.affected_rows,
|
||||
extension: result.extension,
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
// Removes the region status if the operation fails.
|
||||
@@ -645,6 +649,7 @@ impl RegionServerInner {
|
||||
.decode(Bytes::from(plan), catalog_list, "", "")
|
||||
.await
|
||||
.context(DecodeLogicalPlanSnafu)?;
|
||||
|
||||
let result = self
|
||||
.query_engine
|
||||
.execute(logical_plan.into(), ctx)
|
||||
@@ -916,11 +921,11 @@ mod tests {
|
||||
RegionEngineWithStatus::Registering(engine.clone()),
|
||||
);
|
||||
|
||||
let affected_rows = mock_region_server
|
||||
let response = mock_region_server
|
||||
.handle_request(region_id, RegionRequest::Create(create_req))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(affected_rows, 0);
|
||||
assert_eq!(response.affected_rows, 0);
|
||||
|
||||
let status = mock_region_server
|
||||
.inner
|
||||
@@ -931,7 +936,7 @@ mod tests {
|
||||
|
||||
assert!(matches!(status, RegionEngineWithStatus::Registering(_)));
|
||||
|
||||
let affected_rows = mock_region_server
|
||||
let response = mock_region_server
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Open(RegionOpenRequest {
|
||||
@@ -943,7 +948,7 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(affected_rows, 0);
|
||||
assert_eq!(response.affected_rows, 0);
|
||||
|
||||
let status = mock_region_server
|
||||
.inner
|
||||
@@ -971,11 +976,11 @@ mod tests {
|
||||
RegionEngineWithStatus::Deregistering(engine.clone()),
|
||||
);
|
||||
|
||||
let affected_rows = mock_region_server
|
||||
let response = mock_region_server
|
||||
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(affected_rows, 0);
|
||||
assert_eq!(response.affected_rows, 0);
|
||||
|
||||
let status = mock_region_server
|
||||
.inner
|
||||
@@ -990,11 +995,11 @@ mod tests {
|
||||
RegionEngineWithStatus::Deregistering(engine.clone()),
|
||||
);
|
||||
|
||||
let affected_rows = mock_region_server
|
||||
let response = mock_region_server
|
||||
.handle_request(region_id, RegionRequest::Close(RegionCloseRequest {}))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(affected_rows, 0);
|
||||
assert_eq!(response.affected_rows, 0);
|
||||
|
||||
let status = mock_region_server
|
||||
.inner
|
||||
|
||||
@@ -31,7 +31,7 @@ use query::query_engine::DescribeResult;
|
||||
use query::{QueryEngine, QueryEngineContext};
|
||||
use session::context::QueryContextRef;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{RegionEngine, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_engine::{RegionEngine, RegionHandleResult, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::{AffectedRows, RegionRequest};
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
use table::TableRef;
|
||||
@@ -166,16 +166,18 @@ impl RegionEngine for MockRegionEngine {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows, BoxedError> {
|
||||
) -> Result<RegionHandleResult, BoxedError> {
|
||||
if let Some(delay) = self.handle_request_delay {
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
if let Some(mock_fn) = &self.handle_request_mock_fn {
|
||||
return mock_fn(region_id, request).map_err(BoxedError::new);
|
||||
return mock_fn(region_id, request)
|
||||
.map_err(BoxedError::new)
|
||||
.map(RegionHandleResult::new);
|
||||
};
|
||||
|
||||
let _ = self.sender.send((region_id, request)).await;
|
||||
Ok(0)
|
||||
Ok(RegionHandleResult::new(0))
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
|
||||
@@ -143,11 +143,22 @@ impl ColumnSchema {
|
||||
}
|
||||
|
||||
/// Set the nullablity to `true` of the column.
|
||||
/// Similar to [set_nullable] but take the ownership and return a owned value.
|
||||
///
|
||||
/// [set_nullable]: Self::set_nullable
|
||||
pub fn with_nullable_set(mut self) -> Self {
|
||||
self.is_nullable = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the nullability to `true` of the column.
|
||||
/// Similar to [with_nullable_set] but don't take the ownership
|
||||
///
|
||||
/// [with_nullable_set]: Self::with_nullable_set
|
||||
pub fn set_nullable(&mut self) {
|
||||
self.is_nullable = true;
|
||||
}
|
||||
|
||||
/// Creates a new [`ColumnSchema`] with given metadata.
|
||||
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
|
||||
self.metadata = metadata;
|
||||
|
||||
@@ -24,7 +24,7 @@ use common_telemetry::{error, info};
|
||||
use object_store::ObjectStore;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{RegionEngine, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_engine::{RegionEngine, RegionHandleResult, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::{
|
||||
AffectedRows, RegionCloseRequest, RegionCreateRequest, RegionDropRequest, RegionOpenRequest,
|
||||
RegionRequest,
|
||||
@@ -60,7 +60,7 @@ impl RegionEngine for FileRegionEngine {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows, BoxedError> {
|
||||
) -> Result<RegionHandleResult, BoxedError> {
|
||||
self.inner
|
||||
.handle_request(region_id, request)
|
||||
.await
|
||||
@@ -154,8 +154,8 @@ impl EngineInner {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> EngineResult<AffectedRows> {
|
||||
match request {
|
||||
) -> EngineResult<RegionHandleResult> {
|
||||
let result = match request {
|
||||
RegionRequest::Create(req) => self.handle_create(region_id, req).await,
|
||||
RegionRequest::Drop(req) => self.handle_drop(region_id, req).await,
|
||||
RegionRequest::Open(req) => self.handle_open(region_id, req).await,
|
||||
@@ -164,7 +164,8 @@ impl EngineInner {
|
||||
operation: request.to_string(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
};
|
||||
result.map(RegionHandleResult::new)
|
||||
}
|
||||
|
||||
async fn stop(&self) -> EngineResult<()> {
|
||||
|
||||
@@ -25,6 +25,7 @@ num-traits = "0.2"
|
||||
serde.workspace = true
|
||||
servers.workspace = true
|
||||
session.workspace = true
|
||||
smallvec.workspace = true
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
@@ -19,3 +19,4 @@ mod adapter;
|
||||
mod expr;
|
||||
mod plan;
|
||||
mod repr;
|
||||
mod utils;
|
||||
|
||||
@@ -31,7 +31,7 @@ pub(crate) use relation::{RelationDesc, RelationType};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::expr::error::{CastValueSnafu, EvalError};
|
||||
use crate::expr::error::{CastValueSnafu, EvalError, InvalidArgumentSnafu};
|
||||
|
||||
/// System-wide Record count difference type. Useful for capture data change
|
||||
///
|
||||
@@ -39,17 +39,32 @@ use crate::expr::error::{CastValueSnafu, EvalError};
|
||||
/// and +/-n means insert/remove multiple duplicate records.
|
||||
pub type Diff = i64;
|
||||
|
||||
/// System-wide default timestamp type
|
||||
/// System-wide default timestamp type, in milliseconds
|
||||
pub type Timestamp = i64;
|
||||
|
||||
/// System-wide default duration type, in milliseconds
|
||||
pub type Duration = i64;
|
||||
|
||||
/// Default type for a repr of changes to a collection.
|
||||
pub type DiffRow = (Row, Timestamp, Diff);
|
||||
|
||||
pub type KeyValDiffRow = ((Row, Row), Timestamp, Diff);
|
||||
|
||||
/// Convert a value that is or can be converted to Datetime to internal timestamp
|
||||
pub fn value_to_internal_ts(value: Value) -> Result<Timestamp, EvalError> {
|
||||
let is_supported_time_type = |arg: &Value| {
|
||||
let ty = arg.data_type();
|
||||
matches!(
|
||||
ty,
|
||||
ConcreteDataType::Date(..)
|
||||
| ConcreteDataType::DateTime(..)
|
||||
| ConcreteDataType::Timestamp(..)
|
||||
)
|
||||
};
|
||||
match value {
|
||||
Value::DateTime(ts) => Ok(ts.val()),
|
||||
arg => {
|
||||
Value::Int64(ts) => Ok(ts),
|
||||
arg if is_supported_time_type(&arg) => {
|
||||
let arg_ty = arg.data_type();
|
||||
let res = cast(arg, &ConcreteDataType::datetime_datatype()).context({
|
||||
CastValueSnafu {
|
||||
@@ -63,6 +78,10 @@ pub fn value_to_internal_ts(value: Value) -> Result<Timestamp, EvalError> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
_ => InvalidArgumentSnafu {
|
||||
reason: format!("Expect a time type or i64, got {:?}", value.data_type()),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,24 +164,58 @@ impl From<Row> for ProtoRow {
|
||||
ProtoRow { values }
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
#[test]
|
||||
fn test_row() {
|
||||
let row = Row::empty();
|
||||
let row_1 = Row::new(vec![]);
|
||||
assert_eq!(row, row_1);
|
||||
let mut row_2 = Row::new(vec![Value::Int32(1), Value::Int32(2)]);
|
||||
assert_eq!(row_2.get(0), Some(&Value::Int32(1)));
|
||||
row_2.clear();
|
||||
assert_eq!(row_2.get(0), None);
|
||||
row_2
|
||||
.packer()
|
||||
.extend(vec![Value::Int32(1), Value::Int32(2)]);
|
||||
assert_eq!(row_2.get(0), Some(&Value::Int32(1)));
|
||||
row_2.extend(vec![Value::Int32(1), Value::Int32(2)]);
|
||||
assert_eq!(row_2.len(), 4);
|
||||
let row_3 = Row::pack(row_2.into_iter());
|
||||
assert_eq!(row_3.len(), 4);
|
||||
let row_4 = Row::pack(row_3.iter().cloned());
|
||||
assert_eq!(row_3, row_4);
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_row() {
|
||||
let row = Row::empty();
|
||||
let row_1 = Row::new(vec![]);
|
||||
assert_eq!(row, row_1);
|
||||
let mut row_2 = Row::new(vec![Value::Int32(1), Value::Int32(2)]);
|
||||
assert_eq!(row_2.get(0), Some(&Value::Int32(1)));
|
||||
row_2.clear();
|
||||
assert_eq!(row_2.get(0), None);
|
||||
row_2
|
||||
.packer()
|
||||
.extend(vec![Value::Int32(1), Value::Int32(2)]);
|
||||
assert_eq!(row_2.get(0), Some(&Value::Int32(1)));
|
||||
row_2.extend(vec![Value::Int32(1), Value::Int32(2)]);
|
||||
assert_eq!(row_2.len(), 4);
|
||||
let row_3 = Row::pack(row_2.into_iter());
|
||||
assert_eq!(row_3.len(), 4);
|
||||
let row_4 = Row::pack(row_3.iter().cloned());
|
||||
assert_eq!(row_3, row_4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cast_to_internal_ts() {
|
||||
{
|
||||
let a = Value::from(1i32);
|
||||
let b = Value::from(1i64);
|
||||
let c = Value::DateTime(DateTime::new(1i64));
|
||||
let d = Value::from(1.0);
|
||||
|
||||
assert!(value_to_internal_ts(a).is_err());
|
||||
assert_eq!(value_to_internal_ts(b).unwrap(), 1i64);
|
||||
assert_eq!(value_to_internal_ts(c).unwrap(), 1i64);
|
||||
assert!(value_to_internal_ts(d).is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// time related type
|
||||
let a = Value::Date(Date::new(1));
|
||||
assert_eq!(value_to_internal_ts(a).unwrap(), 86400 * 1000i64);
|
||||
let b = Value::Timestamp(common_time::Timestamp::new_second(1));
|
||||
assert_eq!(value_to_internal_ts(b).unwrap(), 1000i64);
|
||||
let c = Value::Time(common_time::time::Time::new_second(1));
|
||||
assert!(matches!(
|
||||
value_to_internal_ts(c),
|
||||
Err(EvalError::InvalidArgument { .. })
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
784
src/flow/src/utils.rs
Normal file
784
src/flow/src/utils.rs
Normal file
@@ -0,0 +1,784 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use crate::expr::error::InternalSnafu;
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::repr::{value_to_internal_ts, Diff, DiffRow, Duration, KeyValDiffRow, Row, Timestamp};
|
||||
|
||||
pub type Batch = BTreeMap<Row, SmallVec<[DiffRow; 2]>>;
|
||||
pub type Spine = BTreeMap<Timestamp, Batch>;
|
||||
|
||||
/// Determine when should a key expire according to it's event timestamp in key,
|
||||
/// if a key is expired, any future updates to it should be ignored
|
||||
/// Note that key is expired by it's event timestamp(contained in the key), not by the time it's inserted(system timestamp)
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub struct KeyExpiryManager {
|
||||
/// a map from event timestamp to key, used for expire keys
|
||||
event_ts_to_key: BTreeMap<Timestamp, BTreeSet<Row>>,
|
||||
/// duration after which a key is considered expired, and will be removed from state
|
||||
key_expiration_duration: Option<Duration>,
|
||||
/// using this to get timestamp from key row
|
||||
event_timestamp_from_row: Option<ScalarExpr>,
|
||||
}
|
||||
|
||||
impl KeyExpiryManager {
|
||||
/// extract event timestamp from key row
|
||||
///
|
||||
/// if no expire state is set, return None
|
||||
pub fn extract_event_ts(&self, row: &Row) -> Result<Option<Timestamp>, EvalError> {
|
||||
let ts = self
|
||||
.event_timestamp_from_row
|
||||
.as_ref()
|
||||
.map(|e| e.eval(&row.inner))
|
||||
.transpose()?
|
||||
.map(value_to_internal_ts)
|
||||
.transpose()?;
|
||||
Ok(ts)
|
||||
}
|
||||
|
||||
/// return timestamp that should be expired by the time `now` by compute `now - expiration_duration`
|
||||
pub fn compute_expiration_timestamp(&self, now: Timestamp) -> Option<Timestamp> {
|
||||
self.key_expiration_duration.map(|d| now - d)
|
||||
}
|
||||
|
||||
/// update the event timestamp to key mapping
|
||||
///
|
||||
/// if given key is expired by now(that is lesser than `now - expiry_duration`), return the amount of time it's expired
|
||||
/// if it's not expired, return None
|
||||
pub fn update_event_ts(
|
||||
&mut self,
|
||||
now: Timestamp,
|
||||
row: &Row,
|
||||
) -> Result<Option<Duration>, EvalError> {
|
||||
let ts = if let Some(event_ts) = self.extract_event_ts(row)? {
|
||||
let ret = self.compute_expiration_timestamp(now).and_then(|e| {
|
||||
if e > event_ts {
|
||||
// return how much time it's expired
|
||||
Some(e - event_ts)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
if let Some(expire_by) = ret {
|
||||
return Ok(Some(expire_by));
|
||||
}
|
||||
event_ts
|
||||
} else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
self.event_ts_to_key
|
||||
.entry(ts)
|
||||
.or_default()
|
||||
.insert(row.clone());
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// A shared state of key-value pair for various state
|
||||
/// in dataflow execution
|
||||
///
|
||||
/// i.e: Mfp operator with temporal filter need to store it's future output so that it can add now, and delete later.
|
||||
/// To get all needed updates in a time span, use [`get_updates_in_range`]
|
||||
///
|
||||
/// And reduce operator need full state of it's output, so that it can query(and modify by calling [`apply_updates`])
|
||||
/// existing state, also need a way to expire keys. To get a key's current value, use [`get`] with time being `now`
|
||||
/// so it's like:
|
||||
/// `mfp operator -> arrange(store futures only, no expire) -> reduce operator <-> arrange(full, with key expiring time) -> output`
|
||||
///
|
||||
/// Note the two way arrow between reduce operator and arrange, it's because reduce operator need to query existing state
|
||||
/// and also need to update existing state
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub struct Arrangement {
|
||||
/// all the updates that pending to be applied
|
||||
/// arranged in time -> (key -> (new_val, diff))
|
||||
/// all updates where the update time is greater than the last key but less than or equal to the current key
|
||||
/// are updates are categorized under current key.
|
||||
///
|
||||
/// that is: `last key < update time <= current key`
|
||||
/// or for time that's before the first key, just being categorized under the first key
|
||||
/// The first key is always `now` which include consolidated updates from past, representing the current state of arrangement
|
||||
///
|
||||
/// Note that for a given time and key, there might be a bunch of updates and they should be applied in order
|
||||
/// And for consolidated batch(i.e. btach representing now), there should be only one update for each key with `diff==1`
|
||||
///
|
||||
/// And since most time a key gots updated by first delete then insert, small vec with size of 2 make sense
|
||||
/// TODO: batch size balancing?
|
||||
spine: Spine,
|
||||
/// if set to false, will not update current value of the arrangement, useful for case like `map -> arrange -> reduce`
|
||||
full_arrangement: bool,
|
||||
/// flag to mark that this arrangement haven't been written to, so that it can be cloned and shared
|
||||
is_written: bool,
|
||||
/// manage the expire state of the arrangement
|
||||
expire_state: Option<KeyExpiryManager>,
|
||||
/// the time that the last compaction happened, also know as current time
|
||||
last_compaction_time: Option<Timestamp>,
|
||||
}
|
||||
|
||||
impl Arrangement {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
spine: Default::default(),
|
||||
full_arrangement: false,
|
||||
is_written: false,
|
||||
expire_state: None,
|
||||
last_compaction_time: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// apply updates into spine, all updates should have timestamps that are larger than spine's first key
|
||||
///
|
||||
/// return the maximum expire time(already expire by how much time) of all updates if any keys is already expired
|
||||
pub fn apply_updates(
|
||||
&mut self,
|
||||
now: Timestamp,
|
||||
updates: Vec<KeyValDiffRow>,
|
||||
) -> Result<Option<Duration>, EvalError> {
|
||||
let mut max_late_by: Option<Duration> = None;
|
||||
if !self.is_written {
|
||||
self.is_written = true;
|
||||
}
|
||||
for ((key, val), ts, diff) in updates {
|
||||
// keep rows with expired event timestamp from being updated
|
||||
if let Some(s) = &mut self.expire_state {
|
||||
if let Some(late_by) = s.update_event_ts(now, &key)? {
|
||||
max_late_by = Some(max_late_by.map_or(late_by, |v| v.max(late_by)));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// the first batch with key that's greater or equal to ts
|
||||
let batch = if let Some((_, batch)) = self.spine.range_mut(ts..).next() {
|
||||
batch
|
||||
} else {
|
||||
// if no batch with `batch key >= ts`, then create a new batch with key being `ts`
|
||||
self.spine.entry(ts).or_default()
|
||||
};
|
||||
|
||||
{
|
||||
let key_updates = batch.entry(key).or_insert(smallvec![]);
|
||||
key_updates.push((val, ts, diff));
|
||||
// a stable sort make updates sort in order of insertion
|
||||
// without changing the order of updates within same tick
|
||||
key_updates.sort_by_key(|r| r.1);
|
||||
}
|
||||
}
|
||||
Ok(max_late_by)
|
||||
}
|
||||
|
||||
/// find out the time of next update in the future
|
||||
/// that is the next update with `timestamp > now`
|
||||
pub fn get_next_update_time(&self, now: &Timestamp) -> Option<Timestamp> {
|
||||
// iter over batches that only have updates of `timestamp>now` and find the first non empty batch, then get the minimum timestamp in that batch
|
||||
let next_batches = self.spine.range((Bound::Excluded(now), Bound::Unbounded));
|
||||
for (_ts, batch) in next_batches {
|
||||
let min_ts = batch
|
||||
.iter()
|
||||
.flat_map(|(_k, v)| v.iter().map(|(_, ts, _)| *ts))
|
||||
.min();
|
||||
if let Some(min_ts) = min_ts {
|
||||
return Some(min_ts);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// all batches are empty, return now
|
||||
None
|
||||
}
|
||||
|
||||
/// get the last compaction time
|
||||
pub fn get_compaction(&self) -> Option<Timestamp> {
|
||||
self.last_compaction_time
|
||||
}
|
||||
|
||||
/// split spine off at `now`, and return the spine that's before `now`(including `now`)
|
||||
fn split_lte(&mut self, now: &Timestamp) -> Spine {
|
||||
let mut before = self.spine.split_off(&(now + 1));
|
||||
std::mem::swap(&mut before, &mut self.spine);
|
||||
|
||||
// if before's last key == now, then all the keys we needed are found
|
||||
if before
|
||||
.last_key_value()
|
||||
.map(|(k, _v)| *k == *now)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return before;
|
||||
}
|
||||
|
||||
// also need to move all keys from the first batch in spine with timestamp<=now to before
|
||||
// we know that all remaining keys to be split off are last key < key <= now, we will make them into a new batch
|
||||
if let Some(mut first_batch) = self.spine.first_entry() {
|
||||
let mut new_batch: Batch = Default::default();
|
||||
// remove all keys with val of empty vec
|
||||
first_batch.get_mut().retain(|key, updates| {
|
||||
// remove keys <= now from updates
|
||||
updates.retain(|(val, ts, diff)| {
|
||||
if *ts <= *now {
|
||||
new_batch.entry(key.clone()).or_insert(smallvec![]).push((
|
||||
val.clone(),
|
||||
*ts,
|
||||
*diff,
|
||||
));
|
||||
}
|
||||
*ts > *now
|
||||
});
|
||||
!updates.is_empty()
|
||||
});
|
||||
|
||||
before.entry(*now).or_default().extend(new_batch);
|
||||
}
|
||||
before
|
||||
}
|
||||
|
||||
/// advance time to `now` and consolidate all older(`now` included) updates to the first key
|
||||
///
|
||||
/// return the maximum expire time(already expire by how much time) of all updates if any keys is already expired
|
||||
pub fn set_compaction(&mut self, now: Timestamp) -> Result<Option<Duration>, EvalError> {
|
||||
let mut max_late_by: Option<Duration> = None;
|
||||
|
||||
let should_compact = self.split_lte(&now);
|
||||
|
||||
self.last_compaction_time = Some(now);
|
||||
// if a full arrangement is not needed, we can just discard everything before and including now
|
||||
if !self.full_arrangement {
|
||||
return Ok(None);
|
||||
}
|
||||
// else we update them into current key value pairs
|
||||
let mut compacted_batch: BTreeMap<Row, SmallVec<[DiffRow; 2]>> = Default::default();
|
||||
|
||||
for (_, batch) in should_compact {
|
||||
for (key, updates) in batch {
|
||||
if let Some(s) = &mut self.expire_state {
|
||||
if let Some(late_by) = s.update_event_ts(now, &key)? {
|
||||
max_late_by = Some(max_late_by.map_or(late_by, |v| v.max(late_by)));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// if diff cancel out each other, then remove the key
|
||||
let mut old_row: Option<DiffRow> =
|
||||
compacted_batch.get(&key).and_then(|v| v.first()).cloned();
|
||||
|
||||
for new_row in updates {
|
||||
old_row = compact_diff_row(old_row, &new_row);
|
||||
}
|
||||
if let Some(compacted_update) = old_row {
|
||||
compacted_batch.insert(key, smallvec![compacted_update]);
|
||||
} else {
|
||||
compacted_batch.remove(&key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// insert the compacted batch into spine with key being `now`
|
||||
self.spine.insert(now, compacted_batch);
|
||||
Ok(max_late_by)
|
||||
}
|
||||
|
||||
/// get the updates of the arrangement from the given range of time
|
||||
pub fn get_updates_in_range<R: std::ops::RangeBounds<Timestamp> + Clone>(
|
||||
&self,
|
||||
range: R,
|
||||
) -> Vec<KeyValDiffRow> {
|
||||
let mut result = vec![];
|
||||
// three part:
|
||||
// 1.the starting batch with first key >= range.start, which may contain updates that not in range
|
||||
// 2. the batches with key in range
|
||||
// 3. the last batch with first key > range.end, which may contain updates that are in range
|
||||
let mut is_first = true;
|
||||
for (_ts, batch) in self.spine.range(range.clone()) {
|
||||
if is_first {
|
||||
for (key, updates) in batch {
|
||||
let iter = updates
|
||||
.iter()
|
||||
.filter(|(_val, ts, _diff)| range.contains(ts))
|
||||
.map(|(val, ts, diff)| ((key.clone(), val.clone()), *ts, *diff));
|
||||
result.extend(iter);
|
||||
}
|
||||
is_first = false;
|
||||
} else {
|
||||
for (key, updates) in batch.clone() {
|
||||
result.extend(
|
||||
updates
|
||||
.iter()
|
||||
.map(|(val, ts, diff)| ((key.clone(), val.clone()), *ts, *diff)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// deal with boundary include start and end
|
||||
// and for the next batch with upper_bound >= range.end
|
||||
// we need to search for updates within range
|
||||
let neg_bound = match range.end_bound() {
|
||||
Bound::Included(b) => {
|
||||
// if boundary is aligned, the last batch in range actually cover the full range
|
||||
// then there will be no further keys we need in the next batch
|
||||
if self.spine.contains_key(b) {
|
||||
return result;
|
||||
}
|
||||
Bound::Excluded(*b)
|
||||
}
|
||||
Bound::Excluded(b) => Bound::Included(*b),
|
||||
Bound::Unbounded => return result,
|
||||
};
|
||||
let search_range = (neg_bound, Bound::Unbounded);
|
||||
if let Some(last_batch) = self.spine.range(search_range).next() {
|
||||
for (key, updates) in last_batch.1 {
|
||||
let iter = updates
|
||||
.iter()
|
||||
.filter(|(_val, ts, _diff)| range.contains(ts))
|
||||
.map(|(val, ts, diff)| ((key.clone(), val.clone()), *ts, *diff));
|
||||
result.extend(iter);
|
||||
}
|
||||
};
|
||||
result
|
||||
}
|
||||
|
||||
/// expire keys in now that are older than expire_time, intended for reducing memory usage and limit late data arrive
|
||||
pub fn trunc_expired(&mut self, now: Timestamp) {
|
||||
if let Some(s) = &mut self.expire_state {
|
||||
let expire_time = if let Some(t) = s.compute_expiration_timestamp(now) {
|
||||
t
|
||||
} else {
|
||||
// never expire
|
||||
return;
|
||||
};
|
||||
// find all keys smaller than or equal expire_time and silently remove them
|
||||
let mut after = s.event_ts_to_key.split_off(&(expire_time + 1));
|
||||
std::mem::swap(&mut s.event_ts_to_key, &mut after);
|
||||
let before = after;
|
||||
for key in before.into_iter().flat_map(|i| i.1.into_iter()) {
|
||||
for (_ts, batch) in self.spine.iter_mut() {
|
||||
batch.remove(&key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// get current state of things
|
||||
/// useful for query existing keys(i.e. reduce and join operator need to query existing state)
|
||||
pub fn get(&self, now: Timestamp, key: &Row) -> Option<(Row, Timestamp, Diff)> {
|
||||
if self.full_arrangement
|
||||
&& self
|
||||
.spine
|
||||
.first_key_value()
|
||||
.map(|(ts, _)| *ts >= now)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
self.spine
|
||||
.first_key_value()
|
||||
.and_then(|(_ts, batch)| batch.get(key).and_then(|v| v.first()).cloned())
|
||||
} else {
|
||||
// check keys <= now to know current value
|
||||
let mut final_val = None;
|
||||
|
||||
let with_extra_batch = {
|
||||
let unaligned = self.spine.range(..=now);
|
||||
if unaligned
|
||||
.clone()
|
||||
.last()
|
||||
.map(|(ts, _)| *ts == now)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
// this extra chain is there just to make type the same
|
||||
unaligned.chain(None)
|
||||
} else {
|
||||
// if the last key is not equal to now, then we need to include the next batch
|
||||
// because we know last batch key < now < next batch key
|
||||
// therefore next batch may contain updates that we want
|
||||
unaligned.chain(
|
||||
self.spine
|
||||
.range((Bound::Excluded(now), Bound::Unbounded))
|
||||
.next(),
|
||||
)
|
||||
}
|
||||
};
|
||||
for (ts, batch) in with_extra_batch {
|
||||
if let Some(new_rows) = batch.get(key).map(|v| v.iter()) {
|
||||
if *ts <= now {
|
||||
for new_row in new_rows {
|
||||
final_val = compact_diff_row(final_val, new_row);
|
||||
}
|
||||
} else {
|
||||
for new_row in new_rows.filter(|new_row| new_row.1 <= now) {
|
||||
final_val = compact_diff_row(final_val, new_row);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
final_val
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compact_diff_row(old_row: Option<DiffRow>, new_row: &DiffRow) -> Option<DiffRow> {
|
||||
let (val, ts, diff) = new_row;
|
||||
match (old_row, diff) {
|
||||
(Some((row, _old_ts, old_diff)), diff) if row == *val && old_diff + diff == 0 => {
|
||||
// the key is deleted now
|
||||
None
|
||||
}
|
||||
(Some((row, _old_ts, old_diff)), diff) if row == *val && old_diff + diff != 0 => {
|
||||
Some((row, *ts, old_diff + *diff))
|
||||
}
|
||||
// if old val not equal new val, simple consider it as being overwritten, for each key can only have one value
|
||||
// so it make sense to just replace the old value with new value
|
||||
_ => Some((val.clone(), *ts, *diff)),
|
||||
}
|
||||
}
|
||||
|
||||
/// A handler to the inner Arrangement, can be cloned and shared, useful for query it's inner state
|
||||
#[derive(Debug)]
|
||||
pub struct ArrangeHandler {
|
||||
inner: Arc<RwLock<Arrangement>>,
|
||||
}
|
||||
impl ArrangeHandler {
|
||||
pub fn from(arr: Arrangement) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(arr)),
|
||||
}
|
||||
}
|
||||
pub fn write(&self) -> tokio::sync::RwLockWriteGuard<'_, Arrangement> {
|
||||
self.inner.blocking_write()
|
||||
}
|
||||
pub fn read(&self) -> tokio::sync::RwLockReadGuard<'_, Arrangement> {
|
||||
self.inner.blocking_read()
|
||||
}
|
||||
|
||||
/// clone the handler, but only keep the future updates
|
||||
pub fn clone_future_only(&self) -> Option<Self> {
|
||||
if self.read().is_written {
|
||||
return None;
|
||||
}
|
||||
Some(Self {
|
||||
inner: self.inner.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// clone the handler, but keep all updates
|
||||
/// prevent illegal clone after the arrange have been written,
|
||||
/// because that will cause loss of data before clone
|
||||
pub fn clone_full_arrange(&self) -> Option<Self> {
|
||||
if self.read().is_written {
|
||||
return None;
|
||||
}
|
||||
let mut arr = self.write();
|
||||
arr.full_arrangement = true;
|
||||
drop(arr);
|
||||
Some(Self {
|
||||
inner: self.inner.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_future_get() {
|
||||
// test if apply only future updates, whether get(future_time) can operate correctly
|
||||
let arr = Arrangement::new();
|
||||
let arr = ArrangeHandler::from(arr);
|
||||
|
||||
{
|
||||
let mut arr = arr.write();
|
||||
let key = Row::new(vec![1.into()]);
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
((key.clone(), Row::new(vec![2.into()])), 1, 1),
|
||||
((key.clone(), Row::new(vec![3.into()])), 2, 1),
|
||||
((key.clone(), Row::new(vec![4.into()])), 3, 1),
|
||||
];
|
||||
// all updates above are future updates
|
||||
arr.apply_updates(0, updates).unwrap();
|
||||
|
||||
assert_eq!(arr.get(1, &key), Some((Row::new(vec![2.into()]), 1, 1)));
|
||||
|
||||
assert_eq!(arr.get(2, &key), Some((Row::new(vec![3.into()]), 2, 1)));
|
||||
|
||||
assert_eq!(arr.get(3, &key), Some((Row::new(vec![4.into()]), 3, 1)));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_save_future_updates() {
|
||||
// mfp operator's temporal filter need to record future updates so that it can delete on time
|
||||
// i.e. insert a record now, delete this record 5 minutes later
|
||||
// they will only need to keep future updates(if downstream don't need full arrangement that is)
|
||||
let arr = Arrangement::new();
|
||||
let arr = ArrangeHandler::from(arr);
|
||||
let arr1 = arr.clone_full_arrange();
|
||||
assert!(arr1.is_some());
|
||||
let arr2 = arr.clone_future_only();
|
||||
assert!(arr2.is_some());
|
||||
|
||||
{
|
||||
let mut arr = arr.write();
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
((Row::new(vec![1.into()]), Row::new(vec![2.into()])), 1, 1),
|
||||
((Row::new(vec![2.into()]), Row::new(vec![3.into()])), 2, 1),
|
||||
((Row::new(vec![3.into()]), Row::new(vec![4.into()])), 3, 1),
|
||||
];
|
||||
// all updates above are future updates
|
||||
arr.apply_updates(0, updates).unwrap();
|
||||
assert_eq!(
|
||||
arr.get_updates_in_range(1..=1),
|
||||
vec![((Row::new(vec![1.into()]), Row::new(vec![2.into()])), 1, 1)]
|
||||
);
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
arr.set_compaction(1).unwrap();
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
}
|
||||
|
||||
let arr2 = arr.clone_full_arrange();
|
||||
assert!(arr2.is_none());
|
||||
{
|
||||
let mut arr = arr.write();
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
arr.set_compaction(2).unwrap();
|
||||
assert_eq!(arr.spine.len(), 2);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reduce_expire_keys() {
|
||||
let mut arr = Arrangement::new();
|
||||
let expire_state = KeyExpiryManager {
|
||||
event_ts_to_key: Default::default(),
|
||||
key_expiration_duration: Some(10),
|
||||
event_timestamp_from_row: Some(ScalarExpr::Column(0)),
|
||||
};
|
||||
let expire_state = Some(expire_state);
|
||||
arr.expire_state = expire_state;
|
||||
arr.full_arrangement = true;
|
||||
|
||||
let arr = ArrangeHandler::from(arr);
|
||||
let now = 0;
|
||||
let key = Row::new(vec![1i64.into()]);
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
(
|
||||
(Row::new(vec![1i64.into()]), Row::new(vec![2.into()])),
|
||||
1,
|
||||
1,
|
||||
),
|
||||
(
|
||||
(Row::new(vec![2i64.into()]), Row::new(vec![3.into()])),
|
||||
2,
|
||||
1,
|
||||
),
|
||||
(
|
||||
(Row::new(vec![3i64.into()]), Row::new(vec![4.into()])),
|
||||
3,
|
||||
1,
|
||||
),
|
||||
];
|
||||
{
|
||||
let mut arr = arr.write();
|
||||
arr.apply_updates(now, updates.clone()).unwrap();
|
||||
// repeat the same updates means having multiple updates for the same key
|
||||
arr.apply_updates(now, updates).unwrap();
|
||||
assert_eq!(
|
||||
arr.get_updates_in_range(1..=1),
|
||||
vec![
|
||||
((key.clone(), Row::new(vec![2.into()])), 1, 1),
|
||||
((key.clone(), Row::new(vec![2.into()])), 1, 1)
|
||||
]
|
||||
);
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
arr.set_compaction(1).unwrap();
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
}
|
||||
{
|
||||
let mut arr = arr.write();
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
assert_eq!(arr.get(10, &key), Some((Row::new(vec![2.into()]), 1, 2)));
|
||||
arr.trunc_expired(10);
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
arr.trunc_expired(11);
|
||||
assert_eq!(arr.get(11, &key), None);
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
assert_eq!(arr.expire_state.as_ref().unwrap().event_ts_to_key.len(), 2);
|
||||
arr.trunc_expired(12);
|
||||
assert_eq!(arr.spine.len(), 3);
|
||||
assert_eq!(arr.expire_state.as_ref().unwrap().event_ts_to_key.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_expired_keys() {
|
||||
// apply updates with a expired key
|
||||
let mut arr = Arrangement::new();
|
||||
let expire_state = KeyExpiryManager {
|
||||
event_ts_to_key: Default::default(),
|
||||
key_expiration_duration: Some(10),
|
||||
event_timestamp_from_row: Some(ScalarExpr::Column(0)),
|
||||
};
|
||||
let expire_state = Some(expire_state);
|
||||
arr.expire_state = expire_state;
|
||||
|
||||
let arr = ArrangeHandler::from(arr);
|
||||
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
(
|
||||
(Row::new(vec![1i64.into()]), Row::new(vec![2.into()])),
|
||||
1,
|
||||
1,
|
||||
),
|
||||
(
|
||||
(Row::new(vec![2i64.into()]), Row::new(vec![3.into()])),
|
||||
2,
|
||||
1,
|
||||
),
|
||||
(
|
||||
(Row::new(vec![3i64.into()]), Row::new(vec![4.into()])),
|
||||
3,
|
||||
1,
|
||||
),
|
||||
(
|
||||
(Row::new(vec![3i64.into()]), Row::new(vec![4.into()])),
|
||||
3,
|
||||
1,
|
||||
),
|
||||
(
|
||||
(Row::new(vec![1i64.into()]), Row::new(vec![42.into()])),
|
||||
10,
|
||||
1,
|
||||
),
|
||||
];
|
||||
{
|
||||
let mut arr = arr.write();
|
||||
arr.apply_updates(11, updates).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
arr.get(11, &Row::new(vec![1i64.into()])),
|
||||
Some((Row::new(vec![42.into()]), 10, 1))
|
||||
);
|
||||
arr.trunc_expired(12);
|
||||
assert_eq!(arr.get(12, &Row::new(vec![1i64.into()])), None);
|
||||
}
|
||||
}
|
||||
|
||||
/// test if split_lte get ranges that are not aligned with batch boundaries
|
||||
/// this split_lte can correctly retrieve all updates in the range, including updates that are in the batches
|
||||
/// near the boundary of input range
|
||||
#[test]
|
||||
fn test_split_off() {
|
||||
let mut arr = Arrangement::new();
|
||||
// manually create batch ..=1 and 2..=3
|
||||
arr.spine.insert(1, Default::default());
|
||||
arr.spine.insert(3, Default::default());
|
||||
arr.apply_updates(
|
||||
2,
|
||||
vec![((Row::new(vec![1.into()]), Row::new(vec![2.into()])), 2, 1)],
|
||||
)
|
||||
.unwrap();
|
||||
// updates falls into the range of 2..=3
|
||||
let mut arr1 = arr.clone();
|
||||
{
|
||||
assert_eq!(arr.get_next_update_time(&1), Some(2));
|
||||
// split expect to take batch ..=1 and create a new batch 2..=2(which contain update)
|
||||
let split = &arr.split_lte(&2);
|
||||
assert_eq!(split.len(), 2);
|
||||
assert_eq!(split[&2].len(), 1);
|
||||
let _ = &arr.split_lte(&3);
|
||||
assert_eq!(arr.get_next_update_time(&1), None);
|
||||
}
|
||||
{
|
||||
// take all updates with timestamp <=1, will get no updates
|
||||
let split = &arr1.split_lte(&1);
|
||||
assert_eq!(split.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// test if get ranges is not aligned with boundary of batch,
|
||||
/// whether can get correct result
|
||||
#[test]
|
||||
fn test_get_by_range() {
|
||||
let mut arr = Arrangement::new();
|
||||
|
||||
// will form {2: [2, 1], 4: [4,3], 6: [6,5]} three batch
|
||||
// TODO(discord9): manually set batch
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
((Row::new(vec![1i64.into()]), Row::empty()), 2, 1),
|
||||
((Row::new(vec![1i64.into()]), Row::empty()), 1, 1),
|
||||
((Row::new(vec![2i64.into()]), Row::empty()), 4, 1),
|
||||
((Row::new(vec![3i64.into()]), Row::empty()), 3, 1),
|
||||
((Row::new(vec![3i64.into()]), Row::empty()), 6, 1),
|
||||
((Row::new(vec![1i64.into()]), Row::empty()), 5, 1),
|
||||
];
|
||||
arr.apply_updates(0, updates).unwrap();
|
||||
assert_eq!(
|
||||
arr.get_updates_in_range(2..=5),
|
||||
vec![
|
||||
((Row::new(vec![1i64.into()]), Row::empty()), 2, 1),
|
||||
((Row::new(vec![2i64.into()]), Row::empty()), 4, 1),
|
||||
((Row::new(vec![3i64.into()]), Row::empty()), 3, 1),
|
||||
((Row::new(vec![1i64.into()]), Row::empty()), 5, 1),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/// test if get with range unaligned with batch boundary
|
||||
/// can get correct result
|
||||
#[test]
|
||||
fn test_get_unaligned() {
|
||||
let mut arr = Arrangement::new();
|
||||
|
||||
// will form {2: [2, 1], 4: [4,3], 6: [6,5]} three batch
|
||||
// TODO(discord9): manually set batch
|
||||
let key = Row::new(vec![1i64.into()]);
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
((key.clone(), Row::new(vec![1i64.into()])), 2, 1),
|
||||
((key.clone(), Row::new(vec![2i64.into()])), 1, 1),
|
||||
((key.clone(), Row::new(vec![3i64.into()])), 4, 1),
|
||||
((key.clone(), Row::new(vec![4i64.into()])), 3, 1),
|
||||
((key.clone(), Row::new(vec![5i64.into()])), 6, 1),
|
||||
((key.clone(), Row::new(vec![6i64.into()])), 5, 1),
|
||||
];
|
||||
arr.apply_updates(0, updates).unwrap();
|
||||
// aligned with batch boundary
|
||||
assert_eq!(arr.get(2, &key), Some((Row::new(vec![1i64.into()]), 2, 1)));
|
||||
// unaligned with batch boundary
|
||||
assert_eq!(arr.get(3, &key), Some((Row::new(vec![4i64.into()]), 3, 1)));
|
||||
}
|
||||
|
||||
/// test if out of order updates can be sorted correctly
|
||||
#[test]
|
||||
fn test_out_of_order_apply_updates() {
|
||||
let mut arr = Arrangement::new();
|
||||
|
||||
let key = Row::new(vec![1i64.into()]);
|
||||
let updates: Vec<KeyValDiffRow> = vec![
|
||||
((key.clone(), Row::new(vec![5i64.into()])), 6, 1),
|
||||
((key.clone(), Row::new(vec![2i64.into()])), 2, -1),
|
||||
((key.clone(), Row::new(vec![1i64.into()])), 2, 1),
|
||||
((key.clone(), Row::new(vec![2i64.into()])), 1, 1),
|
||||
((key.clone(), Row::new(vec![3i64.into()])), 4, 1),
|
||||
((key.clone(), Row::new(vec![4i64.into()])), 3, 1),
|
||||
((key.clone(), Row::new(vec![6i64.into()])), 5, 1),
|
||||
];
|
||||
arr.apply_updates(0, updates.clone()).unwrap();
|
||||
let sorted = updates.iter().sorted_by_key(|r| r.1).cloned().collect_vec();
|
||||
assert_eq!(arr.get_updates_in_range(1..7), sorted);
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,6 @@ use common_meta::heartbeat::handler::{
|
||||
};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_telemetry::error;
|
||||
use futures::future::Either;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct InvalidateTableCacheHandler {
|
||||
@@ -32,8 +31,7 @@ impl HeartbeatResponseHandler for InvalidateTableCacheHandler {
|
||||
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
|
||||
matches!(
|
||||
ctx.incoming_message.as_ref(),
|
||||
Some((_, Instruction::InvalidateTableIdCache { .. }))
|
||||
| Some((_, Instruction::InvalidateTableNameCache { .. }))
|
||||
Some((_, Instruction::InvalidateCaches(_)))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -42,22 +40,11 @@ impl HeartbeatResponseHandler for InvalidateTableCacheHandler {
|
||||
let cache_invalidator = self.cache_invalidator.clone();
|
||||
|
||||
let (meta, invalidator) = match ctx.incoming_message.take() {
|
||||
Some((meta, Instruction::InvalidateTableIdCache(table_id))) => (
|
||||
meta,
|
||||
Either::Left(async move {
|
||||
cache_invalidator
|
||||
.invalidate_table_id(&Context::default(), table_id)
|
||||
.await
|
||||
}),
|
||||
),
|
||||
Some((meta, Instruction::InvalidateTableNameCache(table_name))) => (
|
||||
meta,
|
||||
Either::Right(async move {
|
||||
cache_invalidator
|
||||
.invalidate_table_name(&Context::default(), table_name)
|
||||
.await
|
||||
}),
|
||||
),
|
||||
Some((meta, Instruction::InvalidateCaches(caches))) => (meta, async move {
|
||||
cache_invalidator
|
||||
.invalidate(&Context::default(), caches)
|
||||
.await
|
||||
}),
|
||||
_ => unreachable!("InvalidateTableCacheHandler: should be guarded by 'is_acceptable'"),
|
||||
};
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ use common_meta::heartbeat::handler::{
|
||||
HandlerGroupExecutor, HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutor,
|
||||
};
|
||||
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MessageMeta};
|
||||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::instruction::{CacheIdent, Instruction, InstructionReply, SimpleReply};
|
||||
use common_meta::key::table_info::TableInfoKey;
|
||||
use common_meta::key::TableMetaKey;
|
||||
use partition::manager::TableRouteCacheInvalidator;
|
||||
@@ -74,7 +74,7 @@ async fn test_invalidate_table_cache_handler() {
|
||||
handle_instruction(
|
||||
executor.clone(),
|
||||
mailbox.clone(),
|
||||
Instruction::InvalidateTableIdCache(table_id),
|
||||
Instruction::InvalidateCaches(vec![CacheIdent::TableId(table_id)]),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -90,7 +90,12 @@ async fn test_invalidate_table_cache_handler() {
|
||||
.contains_key(&table_info_key.as_raw_key()));
|
||||
|
||||
// removes a invalid key
|
||||
handle_instruction(executor, mailbox, Instruction::InvalidateTableIdCache(0)).await;
|
||||
handle_instruction(
|
||||
executor,
|
||||
mailbox,
|
||||
Instruction::InvalidateCaches(vec![CacheIdent::TableId(0)]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let (_, reply) = rx.recv().await.unwrap();
|
||||
assert_matches!(
|
||||
|
||||
@@ -473,7 +473,8 @@ pub fn check_permission(
|
||||
// These are executed by query engine, and will be checked there.
|
||||
Statement::Query(_) | Statement::Explain(_) | Statement::Tql(_) | Statement::Delete(_) => {}
|
||||
// database ops won't be checked
|
||||
Statement::CreateDatabase(_) | Statement::ShowDatabases(_) => {}
|
||||
Statement::CreateDatabase(_) | Statement::ShowDatabases(_) | Statement::DropDatabase(_) => {
|
||||
}
|
||||
// show create table and alter are not supported yet
|
||||
Statement::ShowCreateTable(_) | Statement::CreateExternalTable(_) | Statement::Alter(_) => {
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::Output;
|
||||
use common_error::ext::BoxedError;
|
||||
use servers::error::{AuthSnafu, Error};
|
||||
use servers::influxdb::InfluxdbRequest;
|
||||
@@ -30,7 +31,7 @@ impl InfluxdbLineProtocolHandler for Instance {
|
||||
&self,
|
||||
request: InfluxdbRequest,
|
||||
ctx: QueryContextRef,
|
||||
) -> servers::error::Result<()> {
|
||||
) -> servers::error::Result<Output> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
@@ -41,11 +42,9 @@ impl InfluxdbLineProtocolHandler for Instance {
|
||||
interceptor_ref.pre_execute(&request.lines, ctx.clone())?;
|
||||
|
||||
let requests = request.try_into()?;
|
||||
let _ = self
|
||||
.handle_row_inserts(requests, ctx)
|
||||
self.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(servers::error::ExecuteGrpcQuerySnafu)?;
|
||||
Ok(())
|
||||
.context(servers::error::ExecuteGrpcQuerySnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,14 +14,11 @@
|
||||
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::Output;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::tracing;
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::{
|
||||
ExportMetricsServiceRequest, ExportMetricsServiceResponse,
|
||||
};
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::{
|
||||
ExportTraceServiceRequest, ExportTraceServiceResponse,
|
||||
};
|
||||
use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
@@ -40,7 +37,7 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
&self,
|
||||
request: ExportMetricsServiceRequest,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<ExportMetricsServiceResponse> {
|
||||
) -> ServerResult<Output> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
@@ -53,19 +50,12 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
interceptor_ref.pre_execute(ctx.clone())?;
|
||||
|
||||
let (requests, rows) = otlp::metrics::to_grpc_insert_requests(request)?;
|
||||
let _ = self
|
||||
.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
|
||||
OTLP_METRICS_ROWS.inc_by(rows as u64);
|
||||
|
||||
let resp = ExportMetricsServiceResponse {
|
||||
// TODO(sunng87): add support for partial_success in future patch
|
||||
partial_success: None,
|
||||
};
|
||||
Ok(resp)
|
||||
self.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
@@ -73,7 +63,7 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
&self,
|
||||
request: ExportTraceServiceRequest,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<ExportTraceServiceResponse> {
|
||||
) -> ServerResult<Output> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
@@ -95,18 +85,11 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
|
||||
let (requests, rows) = otlp::trace::to_grpc_insert_requests(table_name, spans)?;
|
||||
|
||||
let _ = self
|
||||
.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
|
||||
OTLP_TRACES_ROWS.inc_by(rows as u64);
|
||||
|
||||
let resp = ExportTraceServiceResponse {
|
||||
// TODO(fys): add support for partial_success in future patch
|
||||
partial_success: None,
|
||||
};
|
||||
Ok(resp)
|
||||
self.handle_row_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,10 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::prom_store::remote::read_request::ResponseType;
|
||||
use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse, WriteRequest};
|
||||
use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse};
|
||||
use api::v1::RowInsertRequests;
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
@@ -30,6 +31,7 @@ use operator::insert::InserterRef;
|
||||
use operator::statement::StatementExecutor;
|
||||
use prost::Message;
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::http::header::{collect_plan_metrics, CONTENT_ENCODING_SNAPPY, CONTENT_TYPE_PROTOBUF};
|
||||
use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
|
||||
use servers::interceptor::{PromStoreProtocolInterceptor, PromStoreProtocolInterceptorRef};
|
||||
use servers::prom_store::{self, Metrics};
|
||||
@@ -44,7 +46,6 @@ use crate::error::{
|
||||
TableNotFoundSnafu,
|
||||
};
|
||||
use crate::instance::Instance;
|
||||
use crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES;
|
||||
|
||||
const SAMPLES_RESPONSE_TYPE: i32 = ResponseType::Samples as i32;
|
||||
|
||||
@@ -161,74 +162,34 @@ impl Instance {
|
||||
#[async_trait]
|
||||
impl PromStoreProtocolHandler for Instance {
|
||||
async fn write(
|
||||
&self,
|
||||
request: WriteRequest,
|
||||
ctx: QueryContextRef,
|
||||
with_metric_engine: bool,
|
||||
) -> ServerResult<()> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::PromStoreWrite)
|
||||
.context(AuthSnafu)?;
|
||||
let interceptor_ref = self
|
||||
.plugins
|
||||
.get::<PromStoreProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_write(&request, ctx.clone())?;
|
||||
|
||||
let (requests, samples) = prom_store::to_grpc_row_insert_requests(&request)?;
|
||||
if with_metric_engine {
|
||||
let physical_table = ctx
|
||||
.extension(PHYSICAL_TABLE_PARAM)
|
||||
.unwrap_or(GREPTIME_PHYSICAL_TABLE)
|
||||
.to_string();
|
||||
let _ = self
|
||||
.handle_metric_row_inserts(requests, ctx.clone(), physical_table.to_string())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
} else {
|
||||
let _ = self
|
||||
.handle_row_inserts(requests, ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
}
|
||||
|
||||
PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(samples as u64);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_fast(
|
||||
&self,
|
||||
request: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
with_metric_engine: bool,
|
||||
) -> ServerResult<()> {
|
||||
) -> ServerResult<Output> {
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
.as_ref()
|
||||
.check_permission(ctx.current_user(), PermissionReq::PromStoreWrite)
|
||||
.context(AuthSnafu)?;
|
||||
|
||||
if with_metric_engine {
|
||||
let output = if with_metric_engine {
|
||||
let physical_table = ctx
|
||||
.extension(PHYSICAL_TABLE_PARAM)
|
||||
.unwrap_or(GREPTIME_PHYSICAL_TABLE)
|
||||
.to_string();
|
||||
let _ = self
|
||||
.handle_metric_row_inserts(request, ctx.clone(), physical_table.to_string())
|
||||
self.handle_metric_row_inserts(request, ctx.clone(), physical_table.to_string())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
.context(error::ExecuteGrpcQuerySnafu)?
|
||||
} else {
|
||||
let _ = self
|
||||
.handle_row_inserts(request, ctx.clone())
|
||||
self.handle_row_inserts(request, ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
}
|
||||
Ok(())
|
||||
.context(error::ExecuteGrpcQuerySnafu)?
|
||||
};
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
async fn read(
|
||||
@@ -254,18 +215,29 @@ impl PromStoreProtocolHandler for Instance {
|
||||
match response_type {
|
||||
ResponseType::Samples => {
|
||||
let mut query_results = Vec::with_capacity(results.len());
|
||||
let mut map = HashMap::new();
|
||||
for (table_name, output) in results {
|
||||
let plan = output.meta.plan.clone();
|
||||
query_results.push(to_query_result(&table_name, output).await?);
|
||||
if let Some(ref plan) = plan {
|
||||
collect_plan_metrics(plan.clone(), &mut [&mut map]);
|
||||
}
|
||||
}
|
||||
|
||||
let response = ReadResponse {
|
||||
results: query_results,
|
||||
};
|
||||
|
||||
let resp_metrics = map
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, v.into()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
// TODO(dennis): may consume too much memory, adds flow control
|
||||
Ok(PromStoreResponse {
|
||||
content_type: "application/x-protobuf".to_string(),
|
||||
content_encoding: "snappy".to_string(),
|
||||
content_type: CONTENT_TYPE_PROTOBUF.clone(),
|
||||
content_encoding: CONTENT_ENCODING_SNAPPY.clone(),
|
||||
resp_metrics,
|
||||
body: prom_store::snappy_compress(&response.encode_to_vec())?,
|
||||
})
|
||||
}
|
||||
@@ -306,31 +278,20 @@ impl ExportMetricHandler {
|
||||
impl PromStoreProtocolHandler for ExportMetricHandler {
|
||||
async fn write(
|
||||
&self,
|
||||
request: WriteRequest,
|
||||
request: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
_: bool,
|
||||
) -> ServerResult<()> {
|
||||
let (requests, _) = prom_store::to_grpc_row_insert_requests(&request)?;
|
||||
) -> ServerResult<Output> {
|
||||
self.inserter
|
||||
.handle_metric_row_inserts(
|
||||
requests,
|
||||
request,
|
||||
ctx,
|
||||
&self.statement_executor,
|
||||
GREPTIME_PHYSICAL_TABLE.to_string(),
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_fast(
|
||||
&self,
|
||||
_request: RowInsertRequests,
|
||||
_ctx: QueryContextRef,
|
||||
_with_metric_engine: bool,
|
||||
) -> ServerResult<()> {
|
||||
unimplemented!()
|
||||
.context(error::ExecuteGrpcQuerySnafu)
|
||||
}
|
||||
|
||||
async fn read(
|
||||
|
||||
@@ -18,7 +18,7 @@ use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
|
||||
use async_trait::async_trait;
|
||||
use client::region::check_response_header;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::datanode_manager::{AffectedRows, Datanode, DatanodeManager, DatanodeRef};
|
||||
use common_meta::datanode_manager::{Datanode, DatanodeManager, DatanodeRef, HandleResponse};
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_meta::peer::Peer;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
@@ -63,7 +63,7 @@ impl RegionInvoker {
|
||||
|
||||
#[async_trait]
|
||||
impl Datanode for RegionInvoker {
|
||||
async fn handle(&self, request: RegionRequest) -> MetaResult<AffectedRows> {
|
||||
async fn handle(&self, request: RegionRequest) -> MetaResult<HandleResponse> {
|
||||
let span = request
|
||||
.header
|
||||
.as_ref()
|
||||
@@ -76,10 +76,10 @@ impl Datanode for RegionInvoker {
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(meta_error::ExternalSnafu)?;
|
||||
check_response_header(response.header)
|
||||
check_response_header(&response.header)
|
||||
.map_err(BoxedError::new)
|
||||
.context(meta_error::ExternalSnafu)?;
|
||||
Ok(response.affected_rows as _)
|
||||
Ok(HandleResponse::from_region_response(response))
|
||||
}
|
||||
|
||||
async fn handle_query(&self, request: QueryRequest) -> MetaResult<SendableRecordBatchStream> {
|
||||
|
||||
@@ -41,13 +41,6 @@ lazy_static! {
|
||||
.with_label_values(&["insert"]);
|
||||
pub static ref EXECUTE_SCRIPT_ELAPSED: Histogram = HANDLE_SCRIPT_ELAPSED
|
||||
.with_label_values(&["execute"]);
|
||||
|
||||
/// The samples count of Prometheus remote write.
|
||||
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
|
||||
"greptime_frontend_prometheus_remote_write_samples",
|
||||
"frontend prometheus remote write samples"
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref OTLP_METRICS_ROWS: IntCounter = register_int_counter!(
|
||||
"greptime_frontend_otlp_metrics_rows",
|
||||
"frontend otlp metrics rows"
|
||||
|
||||
@@ -17,10 +17,8 @@ use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::{CacheInvalidator, Context};
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_meta::table_name::TableName;
|
||||
use common_meta::instruction::{CacheIdent, Instruction};
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::metasrv::MetasrvInfo;
|
||||
use crate::service::mailbox::{BroadcastChannel, MailboxRef};
|
||||
@@ -65,13 +63,8 @@ impl MetasrvCacheInvalidator {
|
||||
|
||||
#[async_trait]
|
||||
impl CacheInvalidator for MetasrvCacheInvalidator {
|
||||
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
|
||||
let instruction = Instruction::InvalidateTableIdCache(table_id);
|
||||
self.broadcast(ctx, instruction).await
|
||||
}
|
||||
|
||||
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
|
||||
let instruction = Instruction::InvalidateTableNameCache(table_name);
|
||||
async fn invalidate(&self, ctx: &Context, caches: Vec<CacheIdent>) -> MetaResult<()> {
|
||||
let instruction = Instruction::InvalidateCaches(caches);
|
||||
self.broadcast(ctx, instruction).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -301,6 +301,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse bool: {}", err_msg))]
|
||||
ParseBool {
|
||||
err_msg: String,
|
||||
#[snafu(source)]
|
||||
error: std::str::ParseBoolError,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid arguments: {}", err_msg))]
|
||||
InvalidArguments { err_msg: String, location: Location },
|
||||
|
||||
@@ -709,6 +717,7 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidStatKey { .. }
|
||||
| Error::InvalidInactiveRegionKey { .. }
|
||||
| Error::ParseNum { .. }
|
||||
| Error::ParseBool { .. }
|
||||
| Error::ParseAddr { .. }
|
||||
| Error::ParseDuration { .. }
|
||||
| Error::UnsupportedSelectorType { .. }
|
||||
|
||||
@@ -107,6 +107,9 @@ impl HeartbeatHandler for RegionFailureHandler {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use common_meta::key::MAINTENANCE_KEY;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -163,4 +166,37 @@ mod tests {
|
||||
let dump = handler.failure_detect_runner.dump().await;
|
||||
assert_eq!(dump.iter().collect::<Vec<_>>().len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_maintenance_mode() {
|
||||
let region_failover_manager = create_region_failover_manager();
|
||||
let kv_backend = region_failover_manager.create_context().kv_backend.clone();
|
||||
let _handler = RegionFailureHandler::try_new(
|
||||
None,
|
||||
region_failover_manager.clone(),
|
||||
PhiAccrualFailureDetectorOptions::default(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let kv_req = common_meta::rpc::store::PutRequest {
|
||||
key: Vec::from(MAINTENANCE_KEY),
|
||||
value: vec![],
|
||||
prev_kv: false,
|
||||
};
|
||||
let _ = kv_backend.put(kv_req.clone()).await.unwrap();
|
||||
assert_matches!(
|
||||
region_failover_manager.is_maintenance_mode().await,
|
||||
Ok(true)
|
||||
);
|
||||
|
||||
let _ = kv_backend
|
||||
.delete(MAINTENANCE_KEY.as_bytes(), false)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_matches!(
|
||||
region_failover_manager.is_maintenance_mode().await,
|
||||
Ok(false)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,40 +140,59 @@ impl FailureDetectRunner {
|
||||
let election = self.election.clone();
|
||||
let region_failover_manager = self.region_failover_manager.clone();
|
||||
let runner_handle = common_runtime::spawn_bg(async move {
|
||||
async fn maybe_region_failover(
|
||||
failure_detectors: &Arc<FailureDetectorContainer>,
|
||||
region_failover_manager: &Arc<RegionFailoverManager>,
|
||||
) {
|
||||
match region_failover_manager.is_maintenance_mode().await {
|
||||
Ok(false) => {}
|
||||
Ok(true) => {
|
||||
info!("Maintenance mode is enabled, skip failover");
|
||||
return;
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to check maintenance mode");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let failed_regions = failure_detectors
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
// Intentionally not place `current_time_millis()` out of the iteration.
|
||||
// The failure detection determination should be happened "just in time",
|
||||
// i.e., failed or not has to be compared with the most recent "now".
|
||||
// Besides, it might reduce the false positive of failure detection,
|
||||
// because during the iteration, heartbeats are coming in as usual,
|
||||
// and the `phi`s are still updating.
|
||||
if !e.failure_detector().is_available(current_time_millis()) {
|
||||
Some(e.region_ident().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<RegionIdent>>();
|
||||
|
||||
for r in failed_regions {
|
||||
if let Err(e) = region_failover_manager.do_region_failover(&r).await {
|
||||
error!(e; "Failed to do region failover for {r}");
|
||||
} else {
|
||||
// Now that we know the region is starting to do failover, remove it
|
||||
// from the failure detectors, avoiding the failover procedure to be
|
||||
// triggered again.
|
||||
// If the region is back alive (the failover procedure runs successfully),
|
||||
// it will be added back to the failure detectors again.
|
||||
failure_detectors.remove(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
let start = Instant::now();
|
||||
|
||||
let is_leader = election.as_ref().map(|x| x.is_leader()).unwrap_or(true);
|
||||
if is_leader {
|
||||
let failed_regions = failure_detectors
|
||||
.iter()
|
||||
.filter_map(|e| {
|
||||
// Intentionally not place `current_time_millis()` out of the iteration.
|
||||
// The failure detection determination should be happened "just in time",
|
||||
// i.e., failed or not has to be compared with the most recent "now".
|
||||
// Besides, it might reduce the false positive of failure detection,
|
||||
// because during the iteration, heartbeats are coming in as usual,
|
||||
// and the `phi`s are still updating.
|
||||
if !e.failure_detector().is_available(current_time_millis()) {
|
||||
Some(e.region_ident().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<RegionIdent>>();
|
||||
|
||||
for r in failed_regions {
|
||||
if let Err(e) = region_failover_manager.do_region_failover(&r).await {
|
||||
error!(e; "Failed to do region failover for {r}");
|
||||
} else {
|
||||
// Now that we know the region is starting to do failover, remove it
|
||||
// from the failure detectors, avoiding the failover procedure to be
|
||||
// triggered again.
|
||||
// If the region is back alive (the failover procedure runs successfully),
|
||||
// it will be added back to the failure detectors again.
|
||||
failure_detectors.remove(&r);
|
||||
}
|
||||
}
|
||||
maybe_region_failover(&failure_detectors, ®ion_failover_manager).await;
|
||||
}
|
||||
|
||||
let elapsed = Instant::now().duration_since(start);
|
||||
|
||||
@@ -43,7 +43,7 @@ use tokio::sync::broadcast::error::RecvError;
|
||||
use crate::cluster::MetaPeerClientRef;
|
||||
use crate::election::{Election, LeaderChangeMessage};
|
||||
use crate::error::{
|
||||
self, InitMetadataSnafu, Result, StartProcedureManagerSnafu, StartTelemetryTaskSnafu,
|
||||
InitMetadataSnafu, KvBackendSnafu, Result, StartProcedureManagerSnafu, StartTelemetryTaskSnafu,
|
||||
StopProcedureManagerSnafu,
|
||||
};
|
||||
use crate::failure_detector::PhiAccrualFailureDetectorOptions;
|
||||
@@ -357,7 +357,7 @@ impl MetaSrv {
|
||||
self.leader_cached_kv_backend
|
||||
.load()
|
||||
.await
|
||||
.context(error::KvBackendSnafu)?;
|
||||
.context(KvBackendSnafu)?;
|
||||
self.procedure_manager
|
||||
.start()
|
||||
.await
|
||||
|
||||
@@ -260,6 +260,7 @@ impl MetaSrvBuilder {
|
||||
let region_failover_manager = Arc::new(RegionFailoverManager::new(
|
||||
distributed_time_constants::REGION_LEASE_SECS,
|
||||
in_memory.clone(),
|
||||
kv_backend.clone(),
|
||||
mailbox.clone(),
|
||||
procedure_manager.clone(),
|
||||
(selector.clone(), selector_ctx.clone()),
|
||||
|
||||
@@ -26,9 +26,10 @@ use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_meta::key::datanode_table::DatanodeTableKey;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::kv_backend::ResettableKvBackendRef;
|
||||
use common_meta::lock_key::{RegionLock, TableLock};
|
||||
use common_meta::key::{TableMetadataManagerRef, MAINTENANCE_KEY};
|
||||
use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef};
|
||||
use common_meta::lock_key::{CatalogLock, RegionLock, SchemaLock, TableLock};
|
||||
use common_meta::table_name::TableName;
|
||||
use common_meta::{ClusterId, RegionIdent};
|
||||
use common_procedure::error::{
|
||||
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
@@ -44,7 +45,9 @@ use snafu::ResultExt;
|
||||
use store_api::storage::{RegionId, RegionNumber};
|
||||
use table::metadata::TableId;
|
||||
|
||||
use crate::error::{RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu};
|
||||
use crate::error::{
|
||||
self, KvBackendSnafu, RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu,
|
||||
};
|
||||
use crate::lock::DistLockRef;
|
||||
use crate::metasrv::{SelectorContext, SelectorRef};
|
||||
use crate::service::mailbox::MailboxRef;
|
||||
@@ -72,6 +75,7 @@ impl From<RegionIdent> for RegionFailoverKey {
|
||||
pub(crate) struct RegionFailoverManager {
|
||||
region_lease_secs: u64,
|
||||
in_memory: ResettableKvBackendRef,
|
||||
kv_backend: KvBackendRef,
|
||||
mailbox: MailboxRef,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
selector: SelectorRef,
|
||||
@@ -93,9 +97,11 @@ impl Drop for FailoverProcedureGuard {
|
||||
}
|
||||
|
||||
impl RegionFailoverManager {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn new(
|
||||
region_lease_secs: u64,
|
||||
in_memory: ResettableKvBackendRef,
|
||||
kv_backend: KvBackendRef,
|
||||
mailbox: MailboxRef,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
(selector, selector_ctx): (SelectorRef, SelectorContext),
|
||||
@@ -105,6 +111,7 @@ impl RegionFailoverManager {
|
||||
Self {
|
||||
region_lease_secs,
|
||||
in_memory,
|
||||
kv_backend,
|
||||
mailbox,
|
||||
procedure_manager,
|
||||
selector,
|
||||
@@ -119,6 +126,7 @@ impl RegionFailoverManager {
|
||||
RegionFailoverContext {
|
||||
region_lease_secs: self.region_lease_secs,
|
||||
in_memory: self.in_memory.clone(),
|
||||
kv_backend: self.kv_backend.clone(),
|
||||
mailbox: self.mailbox.clone(),
|
||||
selector: self.selector.clone(),
|
||||
selector_ctx: self.selector_ctx.clone(),
|
||||
@@ -158,13 +166,27 @@ impl RegionFailoverManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn is_maintenance_mode(&self) -> Result<bool> {
|
||||
self.kv_backend
|
||||
.exists(MAINTENANCE_KEY.as_bytes())
|
||||
.await
|
||||
.context(KvBackendSnafu)
|
||||
}
|
||||
|
||||
pub(crate) async fn do_region_failover(&self, failed_region: &RegionIdent) -> Result<()> {
|
||||
let Some(guard) = self.insert_running_procedures(failed_region) else {
|
||||
warn!("Region failover procedure for region {failed_region} is already running!");
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if !self.table_exists(failed_region).await? {
|
||||
let table_info = self
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(failed_region.table_id)
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)?;
|
||||
|
||||
if table_info.is_none() {
|
||||
// The table could be dropped before the failure detector knows it. Then the region
|
||||
// failover is not needed.
|
||||
// Or the table could be renamed. But we will have a new region ident to detect failure.
|
||||
@@ -178,7 +200,15 @@ impl RegionFailoverManager {
|
||||
}
|
||||
|
||||
let context = self.create_context();
|
||||
let procedure = RegionFailoverProcedure::new(failed_region.clone(), context);
|
||||
// Safety: Check before.
|
||||
let table_info = table_info.unwrap();
|
||||
let TableName {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
..
|
||||
} = table_info.table_name();
|
||||
let procedure =
|
||||
RegionFailoverProcedure::new(catalog_name, schema_name, failed_region.clone(), context);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
let procedure_id = procedure_with_id.id;
|
||||
info!("Starting region failover procedure {procedure_id} for region {failed_region:?}");
|
||||
@@ -206,16 +236,6 @@ impl RegionFailoverManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn table_exists(&self, failed_region: &RegionIdent) -> Result<bool> {
|
||||
Ok(self
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_region_distribution(failed_region.table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.is_some())
|
||||
}
|
||||
|
||||
async fn failed_region_exists(&self, failed_region: &RegionIdent) -> Result<bool> {
|
||||
let table_id = failed_region.table_id;
|
||||
let datanode_id = failed_region.datanode_id;
|
||||
@@ -238,10 +258,17 @@ impl RegionFailoverManager {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct LockMeta {
|
||||
catalog: String,
|
||||
schema: String,
|
||||
}
|
||||
|
||||
/// A "Node" in the state machine of region failover procedure.
|
||||
/// Contains the current state and the data.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct Node {
|
||||
lock_meta: LockMeta,
|
||||
failed_region: RegionIdent,
|
||||
state: Box<dyn State>,
|
||||
}
|
||||
@@ -251,6 +278,7 @@ struct Node {
|
||||
pub struct RegionFailoverContext {
|
||||
pub region_lease_secs: u64,
|
||||
pub in_memory: ResettableKvBackendRef,
|
||||
pub kv_backend: KvBackendRef,
|
||||
pub mailbox: MailboxRef,
|
||||
pub selector: SelectorRef,
|
||||
pub selector_ctx: SelectorContext,
|
||||
@@ -330,9 +358,15 @@ pub struct RegionFailoverProcedure {
|
||||
impl RegionFailoverProcedure {
|
||||
const TYPE_NAME: &'static str = "metasrv-procedure::RegionFailover";
|
||||
|
||||
pub fn new(failed_region: RegionIdent, context: RegionFailoverContext) -> Self {
|
||||
pub fn new(
|
||||
catalog: String,
|
||||
schema: String,
|
||||
failed_region: RegionIdent,
|
||||
context: RegionFailoverContext,
|
||||
) -> Self {
|
||||
let state = RegionFailoverStart::new();
|
||||
let node = Node {
|
||||
lock_meta: LockMeta { catalog, schema },
|
||||
failed_region,
|
||||
state: Box::new(state),
|
||||
};
|
||||
@@ -372,8 +406,9 @@ impl Procedure for RegionFailoverProcedure {
|
||||
|
||||
fn lock_key(&self) -> LockKey {
|
||||
let region_ident = &self.node.failed_region;
|
||||
// TODO(weny): acquires the catalog, schema read locks.
|
||||
let lock_key = vec![
|
||||
CatalogLock::Read(&self.node.lock_meta.catalog).into(),
|
||||
SchemaLock::read(&self.node.lock_meta.catalog, &self.node.lock_meta.catalog).into(),
|
||||
TableLock::Read(region_ident.table_id).into(),
|
||||
RegionLock::Write(RegionId::new(
|
||||
region_ident.table_id,
|
||||
@@ -549,6 +584,7 @@ mod tests {
|
||||
context: RegionFailoverContext {
|
||||
region_lease_secs: 10,
|
||||
in_memory,
|
||||
kv_backend,
|
||||
mailbox,
|
||||
selector,
|
||||
selector_ctx,
|
||||
@@ -568,6 +604,8 @@ mod tests {
|
||||
let failed_region = env.failed_region(1).await;
|
||||
|
||||
let mut procedure = Box::new(RegionFailoverProcedure::new(
|
||||
"greptime".into(),
|
||||
"public".into(),
|
||||
failed_region.clone(),
|
||||
env.context.clone(),
|
||||
)) as BoxedProcedure;
|
||||
@@ -671,7 +709,7 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
procedure.dump().unwrap(),
|
||||
r#"{"failed_region":{"cluster_id":0,"datanode_id":1,"table_id":1,"region_number":1,"engine":"mito2"},"state":{"region_failover_state":"RegionFailoverEnd"}}"#
|
||||
r#"{"lock_meta":{"catalog":"greptime","schema":"public"},"failed_region":{"cluster_id":0,"datanode_id":1,"table_id":1,"region_number":1,"engine":"mito2"},"state":{"region_failover_state":"RegionFailoverEnd"}}"#
|
||||
);
|
||||
|
||||
// Verifies that the failed region (region 1) is moved from failed datanode (datanode 1) to the candidate datanode.
|
||||
@@ -700,6 +738,10 @@ mod tests {
|
||||
|
||||
let state = RegionFailoverStart::new();
|
||||
let node = Node {
|
||||
lock_meta: LockMeta {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
},
|
||||
failed_region,
|
||||
state: Box::new(state),
|
||||
};
|
||||
@@ -711,12 +753,12 @@ mod tests {
|
||||
let s = procedure.dump().unwrap();
|
||||
assert_eq!(
|
||||
s,
|
||||
r#"{"failed_region":{"cluster_id":0,"datanode_id":1,"table_id":1,"region_number":1,"engine":"mito2"},"state":{"region_failover_state":"RegionFailoverStart","failover_candidate":null}}"#
|
||||
r#"{"lock_meta":{"catalog":"greptime","schema":"public"},"failed_region":{"cluster_id":0,"datanode_id":1,"table_id":1,"region_number":1,"engine":"mito2"},"state":{"region_failover_state":"RegionFailoverStart","failover_candidate":null}}"#,
|
||||
);
|
||||
let n: Node = serde_json::from_str(&s).unwrap();
|
||||
assert_eq!(
|
||||
format!("{n:?}"),
|
||||
r#"Node { failed_region: RegionIdent { cluster_id: 0, datanode_id: 1, table_id: 1, region_number: 1, engine: "mito2" }, state: RegionFailoverStart { failover_candidate: None } }"#
|
||||
r#"Node { lock_meta: LockMeta { catalog: "greptime", schema: "public" }, failed_region: RegionIdent { cluster_id: 0, datanode_id: 1, table_id: 1, region_number: 1, engine: "mito2" }, state: RegionFailoverStart { failover_candidate: None } }"#,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -765,6 +807,10 @@ mod tests {
|
||||
|
||||
let state = RegionFailoverStart::new();
|
||||
let node = Node {
|
||||
lock_meta: LockMeta {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
},
|
||||
failed_region,
|
||||
state: Box::new(state),
|
||||
};
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use async_trait::async_trait;
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_meta::instruction::{CacheIdent, Instruction};
|
||||
use common_meta::RegionIdent;
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -35,7 +35,7 @@ impl InvalidateCache {
|
||||
ctx: &RegionFailoverContext,
|
||||
table_id: TableId,
|
||||
) -> Result<()> {
|
||||
let instruction = Instruction::InvalidateTableIdCache(table_id);
|
||||
let instruction = Instruction::InvalidateCaches(vec![CacheIdent::TableId(table_id)]);
|
||||
|
||||
let msg = &MailboxMessage::json_message(
|
||||
"Invalidate Table Cache",
|
||||
@@ -133,7 +133,10 @@ mod tests {
|
||||
assert_eq!(
|
||||
received.payload,
|
||||
Some(Payload::Json(
|
||||
serde_json::to_string(&Instruction::InvalidateTableIdCache(table_id)).unwrap(),
|
||||
serde_json::to_string(&Instruction::InvalidateCaches(vec![
|
||||
CacheIdent::TableId(table_id)
|
||||
]))
|
||||
.unwrap(),
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod downgrade_leader_region;
|
||||
// TODO(weny): remove it.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) mod manager;
|
||||
pub(crate) mod migration_abort;
|
||||
pub(crate) mod migration_end;
|
||||
@@ -31,12 +29,12 @@ use std::time::Duration;
|
||||
|
||||
use api::v1::meta::MailboxMessage;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::instruction::Instruction;
|
||||
use common_meta::instruction::{CacheIdent, Instruction};
|
||||
use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue};
|
||||
use common_meta::key::table_info::TableInfoValue;
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use common_meta::lock_key::{RegionLock, TableLock};
|
||||
use common_meta::lock_key::{CatalogLock, RegionLock, SchemaLock, TableLock};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::region_keeper::{MemoryRegionKeeperRef, OperatingRegionGuard};
|
||||
use common_meta::ClusterId;
|
||||
@@ -61,6 +59,10 @@ use crate::service::mailbox::{BroadcastChannel, MailboxRef};
|
||||
/// **Notes: Stores with too large data in the context might incur replication overhead.**
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct PersistentContext {
|
||||
/// The table catalog.
|
||||
catalog: String,
|
||||
/// The table schema.
|
||||
schema: String,
|
||||
/// The Id of the cluster.
|
||||
cluster_id: ClusterId,
|
||||
/// The [Peer] of migration source.
|
||||
@@ -81,8 +83,9 @@ fn default_replay_timeout() -> Duration {
|
||||
impl PersistentContext {
|
||||
pub fn lock_key(&self) -> Vec<StringKey> {
|
||||
let region_id = self.region_id;
|
||||
// TODO(weny): acquires the catalog, schema read locks.
|
||||
let lock_key = vec![
|
||||
CatalogLock::Read(&self.catalog).into(),
|
||||
SchemaLock::read(&self.catalog, &self.schema).into(),
|
||||
TableLock::Read(region_id.table_id()).into(),
|
||||
RegionLock::Write(region_id).into(),
|
||||
];
|
||||
@@ -185,8 +188,6 @@ impl ContextFactory for DefaultContextFactory {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(weny): remove it.
|
||||
#[allow(dead_code)]
|
||||
/// The context of procedure execution.
|
||||
pub struct Context {
|
||||
persistent_ctx: PersistentContext,
|
||||
@@ -320,7 +321,7 @@ impl Context {
|
||||
/// Broadcasts the invalidate table cache message.
|
||||
pub async fn invalidate_table_cache(&self) -> Result<()> {
|
||||
let table_id = self.region_id().table_id();
|
||||
let instruction = Instruction::InvalidateTableIdCache(table_id);
|
||||
let instruction = Instruction::InvalidateCaches(vec![CacheIdent::TableId(table_id)]);
|
||||
|
||||
let msg = &MailboxMessage::json_message(
|
||||
"Invalidate Table Cache",
|
||||
@@ -368,7 +369,6 @@ pub struct RegionMigrationProcedure {
|
||||
context: Context,
|
||||
}
|
||||
|
||||
// TODO(weny): remove it.
|
||||
#[allow(dead_code)]
|
||||
impl RegionMigrationProcedure {
|
||||
const TYPE_NAME: &'static str = "metasrv-procedure::RegionMigration";
|
||||
@@ -487,8 +487,7 @@ mod tests {
|
||||
let procedure = RegionMigrationProcedure::new(persistent_context, context);
|
||||
|
||||
let serialized = procedure.dump().unwrap();
|
||||
|
||||
let expected = r#"{"persistent_ctx":{"cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105,"replay_timeout":"1s"},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
|
||||
let expected = r#"{"persistent_ctx":{"catalog":"greptime","schema":"public","cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105,"replay_timeout":"1s"},"state":{"region_migration_state":"RegionMigrationStart"}}"#;
|
||||
assert_eq!(expected, serialized);
|
||||
}
|
||||
|
||||
@@ -496,7 +495,7 @@ mod tests {
|
||||
fn test_backward_compatibility() {
|
||||
let persistent_ctx = test_util::new_persistent_context(1, 2, RegionId::new(1024, 1));
|
||||
// NOTES: Changes it will break backward compatibility.
|
||||
let serialized = r#"{"cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105}"#;
|
||||
let serialized = r#"{"catalog":"greptime","schema":"public","cluster_id":0,"from_peer":{"id":1,"addr":""},"to_peer":{"id":2,"addr":""},"region_id":4398046511105}"#;
|
||||
let deserialized: PersistentContext = serde_json::from_str(serialized).unwrap();
|
||||
|
||||
assert_eq!(persistent_ctx, deserialized);
|
||||
@@ -583,7 +582,10 @@ mod tests {
|
||||
let msg = resp.mailbox_message.unwrap();
|
||||
|
||||
let instruction = HeartbeatMailbox::json_instruction(&msg).unwrap();
|
||||
assert_matches!(instruction, Instruction::InvalidateTableIdCache(1024));
|
||||
assert_eq!(
|
||||
instruction,
|
||||
Instruction::InvalidateCaches(vec![CacheIdent::TableId(1024)])
|
||||
);
|
||||
}
|
||||
|
||||
fn procedure_flow_steps(from_peer_id: u64, to_peer_id: u64) -> Vec<Step> {
|
||||
|
||||
@@ -226,6 +226,8 @@ mod tests {
|
||||
|
||||
fn new_persistent_context() -> PersistentContext {
|
||||
PersistentContext {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
region_id: RegionId::new(1024, 1),
|
||||
|
||||
@@ -18,9 +18,11 @@ use std::fmt::Display;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::key::table_info::TableInfoValue;
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_meta::table_name::TableName;
|
||||
use common_meta::ClusterId;
|
||||
use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithId};
|
||||
use common_telemetry::{error, info};
|
||||
@@ -93,26 +95,6 @@ impl Display for RegionMigrationProcedureTask {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegionMigrationProcedureTask> for PersistentContext {
|
||||
fn from(
|
||||
RegionMigrationProcedureTask {
|
||||
cluster_id,
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout,
|
||||
}: RegionMigrationProcedureTask,
|
||||
) -> Self {
|
||||
PersistentContext {
|
||||
cluster_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
region_id,
|
||||
replay_timeout,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegionMigrationManager {
|
||||
/// Returns new [RegionMigrationManager]
|
||||
pub(crate) fn new(
|
||||
@@ -188,6 +170,22 @@ impl RegionMigrationManager {
|
||||
Ok(table_route)
|
||||
}
|
||||
|
||||
async fn retrieve_table_info(&self, region_id: RegionId) -> Result<TableInfoValue> {
|
||||
let table_route = self
|
||||
.context_factory
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(region_id.table_id())
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)?
|
||||
.context(error::TableInfoNotFoundSnafu {
|
||||
table_id: region_id.table_id(),
|
||||
})?
|
||||
.into_inner();
|
||||
|
||||
Ok(table_route)
|
||||
}
|
||||
|
||||
/// Verifies the type of region migration table route.
|
||||
fn verify_table_route(
|
||||
&self,
|
||||
@@ -279,8 +277,31 @@ impl RegionMigrationManager {
|
||||
|
||||
self.verify_region_leader_peer(®ion_route, &task)?;
|
||||
|
||||
let procedure =
|
||||
RegionMigrationProcedure::new(task.clone().into(), self.context_factory.clone());
|
||||
let table_info = self.retrieve_table_info(region_id).await?;
|
||||
let TableName {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
..
|
||||
} = table_info.table_name();
|
||||
let RegionMigrationProcedureTask {
|
||||
cluster_id,
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout,
|
||||
} = task.clone();
|
||||
let procedure = RegionMigrationProcedure::new(
|
||||
PersistentContext {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
cluster_id,
|
||||
region_id,
|
||||
from_peer,
|
||||
to_peer,
|
||||
replay_timeout,
|
||||
},
|
||||
self.context_factory.clone(),
|
||||
);
|
||||
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
|
||||
let procedure_id = procedure_with_id.id;
|
||||
info!("Starting region migration procedure {procedure_id} for {task}");
|
||||
|
||||
@@ -278,6 +278,8 @@ pub fn send_mock_reply(
|
||||
/// Generates a [PersistentContext].
|
||||
pub fn new_persistent_context(from: u64, to: u64, region_id: RegionId) -> PersistentContext {
|
||||
PersistentContext {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
from_peer: Peer::empty(from),
|
||||
to_peer: Peer::empty(to),
|
||||
region_id,
|
||||
@@ -297,16 +299,6 @@ pub(crate) struct ProcedureMigrationTestSuite {
|
||||
pub(crate) type BeforeTest =
|
||||
Arc<dyn Fn(&mut ProcedureMigrationTestSuite) -> BoxFuture<'_, ()> + Send + Sync>;
|
||||
|
||||
/// Custom assertion.
|
||||
pub(crate) type CustomAssertion = Arc<
|
||||
dyn Fn(
|
||||
&mut ProcedureMigrationTestSuite,
|
||||
Result<(Box<dyn State>, Status)>,
|
||||
) -> BoxFuture<'_, Result<()>>
|
||||
+ Send
|
||||
+ Sync,
|
||||
>;
|
||||
|
||||
/// State assertion function.
|
||||
pub(crate) type StateAssertion = Arc<dyn Fn(&dyn State) + Send + Sync>;
|
||||
|
||||
@@ -316,14 +308,11 @@ pub(crate) type StatusAssertion = Arc<dyn Fn(Status) + Send + Sync>;
|
||||
/// Error assertion function.
|
||||
pub(crate) type ErrorAssertion = Arc<dyn Fn(Error) + Send + Sync>;
|
||||
|
||||
// TODO(weny): Remove it.
|
||||
#[allow(dead_code)]
|
||||
/// The type of assertion.
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Assertion {
|
||||
Simple(StateAssertion, StatusAssertion),
|
||||
Error(ErrorAssertion),
|
||||
Custom(CustomAssertion),
|
||||
}
|
||||
|
||||
impl Assertion {
|
||||
@@ -384,9 +373,6 @@ impl ProcedureMigrationTestSuite {
|
||||
let error = result.unwrap_err();
|
||||
error_assert(error);
|
||||
}
|
||||
Assertion::Custom(assert_fn) => {
|
||||
assert_fn(self, result).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -232,6 +232,8 @@ mod tests {
|
||||
|
||||
fn new_persistent_context() -> PersistentContext {
|
||||
PersistentContext {
|
||||
catalog: "greptime".into(),
|
||||
schema: "public".into(),
|
||||
from_peer: Peer::empty(1),
|
||||
to_peer: Peer::empty(2),
|
||||
region_id: RegionId::new(1024, 1),
|
||||
|
||||
@@ -30,6 +30,7 @@ use common_meta::datanode_manager::DatanodeManagerRef;
|
||||
use common_meta::ddl::alter_table::AlterTableProcedure;
|
||||
use common_meta::ddl::create_logical_tables::{CreateLogicalTablesProcedure, CreateTablesState};
|
||||
use common_meta::ddl::create_table::*;
|
||||
use common_meta::ddl::drop_table::executor::DropTableExecutor;
|
||||
use common_meta::ddl::drop_table::DropTableProcedure;
|
||||
use common_meta::ddl::test_util::create_table::build_raw_table_info_from_expr;
|
||||
use common_meta::ddl::test_util::{TestColumnDefBuilder, TestCreateTableExprBuilder};
|
||||
@@ -38,6 +39,7 @@ use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::DeserializedValueWithBytes;
|
||||
use common_meta::rpc::ddl::{AlterTableTask, CreateTableTask, DropTableTask};
|
||||
use common_meta::rpc::router::{find_leaders, RegionRoute};
|
||||
use common_meta::table_name::TableName;
|
||||
use common_procedure::Status;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -322,7 +324,11 @@ async fn test_on_datanode_drop_regions() {
|
||||
table_id: 42,
|
||||
drop_if_exists: false,
|
||||
};
|
||||
|
||||
let drop_table_executor = DropTableExecutor::new(
|
||||
TableName::new("my_catalog", "my_schema", "my_table"),
|
||||
42,
|
||||
false,
|
||||
);
|
||||
let (region_server, mut rx) = EchoRegionServer::new();
|
||||
let region_routes = test_data::new_region_routes();
|
||||
let datanode_manager = new_datanode_manager(®ion_server, ®ion_routes).await;
|
||||
@@ -357,7 +363,10 @@ async fn test_on_datanode_drop_regions() {
|
||||
}
|
||||
});
|
||||
|
||||
let status = procedure.on_datanode_drop_regions().await.unwrap();
|
||||
let status = procedure
|
||||
.on_datanode_drop_regions(&drop_table_executor)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(status.is_done());
|
||||
|
||||
handle.await.unwrap();
|
||||
|
||||
@@ -93,6 +93,7 @@ pub mod mock {
|
||||
}),
|
||||
}),
|
||||
affected_rows: 0,
|
||||
extension: Default::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,10 +15,9 @@
|
||||
mod health;
|
||||
mod heartbeat;
|
||||
mod leader;
|
||||
mod maintenance;
|
||||
mod meta;
|
||||
// TODO(weny): removes it.
|
||||
mod node_lease;
|
||||
#[allow(dead_code)]
|
||||
mod region_migration;
|
||||
mod route;
|
||||
mod util;
|
||||
@@ -99,6 +98,13 @@ pub fn make_admin_service(meta_srv: MetaSrv) -> Admin {
|
||||
};
|
||||
let router = router.route("/region-migration", handler);
|
||||
|
||||
let handler = maintenance::MaintenanceHandler {
|
||||
kv_backend: meta_srv.kv_backend().clone(),
|
||||
};
|
||||
let router = router
|
||||
.route("/maintenance", handler.clone())
|
||||
.route("/maintenance/set", handler);
|
||||
|
||||
let router = Router::nest("/admin", router);
|
||||
|
||||
Admin::new(router)
|
||||
|
||||
103
src/meta-srv/src/service/admin/maintenance.rs
Normal file
103
src/meta-srv/src/service/admin/maintenance.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_meta::key::MAINTENANCE_KEY;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::rpc::store::PutRequest;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tonic::codegen::http;
|
||||
use tonic::codegen::http::Response;
|
||||
|
||||
use crate::error::{
|
||||
InvalidHttpBodySnafu, KvBackendSnafu, MissingRequiredParameterSnafu, ParseBoolSnafu,
|
||||
};
|
||||
use crate::service::admin::HttpHandler;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MaintenanceHandler {
|
||||
pub kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
impl MaintenanceHandler {
|
||||
async fn get_maintenance(&self) -> crate::Result<Response<String>> {
|
||||
let enabled = self
|
||||
.kv_backend
|
||||
.exists(MAINTENANCE_KEY.as_bytes())
|
||||
.await
|
||||
.context(KvBackendSnafu)?;
|
||||
let response = if enabled {
|
||||
"Maintenance mode is enabled"
|
||||
} else {
|
||||
"Maintenance mode is disabled"
|
||||
};
|
||||
http::Response::builder()
|
||||
.status(http::StatusCode::OK)
|
||||
.body(response.into())
|
||||
.context(InvalidHttpBodySnafu)
|
||||
}
|
||||
|
||||
async fn set_maintenance(
|
||||
&self,
|
||||
params: &HashMap<String, String>,
|
||||
) -> crate::Result<Response<String>> {
|
||||
let enable = params
|
||||
.get("enable")
|
||||
.map(|v| v.parse::<bool>())
|
||||
.context(MissingRequiredParameterSnafu { param: "enable" })?
|
||||
.context(ParseBoolSnafu {
|
||||
err_msg: "'enable' must be 'true' or 'false'",
|
||||
})?;
|
||||
|
||||
let response = if enable {
|
||||
let req = PutRequest {
|
||||
key: Vec::from(MAINTENANCE_KEY),
|
||||
value: vec![],
|
||||
prev_kv: false,
|
||||
};
|
||||
self.kv_backend
|
||||
.put(req.clone())
|
||||
.await
|
||||
.context(KvBackendSnafu)?;
|
||||
"Maintenance mode enabled"
|
||||
} else {
|
||||
self.kv_backend
|
||||
.delete(MAINTENANCE_KEY.as_bytes(), false)
|
||||
.await
|
||||
.context(KvBackendSnafu)?;
|
||||
"Maintenance mode disabled"
|
||||
};
|
||||
|
||||
http::Response::builder()
|
||||
.status(http::StatusCode::OK)
|
||||
.body(response.into())
|
||||
.context(InvalidHttpBodySnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl HttpHandler for MaintenanceHandler {
|
||||
async fn handle(
|
||||
&self,
|
||||
path: &str,
|
||||
params: &HashMap<String, String>,
|
||||
) -> crate::Result<Response<String>> {
|
||||
if path.ends_with("/set") {
|
||||
self.set_maintenance(params).await
|
||||
} else {
|
||||
self.get_maintenance().await
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -86,6 +86,7 @@ pub(crate) fn create_region_failover_manager() -> Arc<RegionFailoverManager> {
|
||||
Arc::new(RegionFailoverManager::new(
|
||||
10,
|
||||
in_memory,
|
||||
kv_backend.clone(),
|
||||
mailbox,
|
||||
procedure_manager,
|
||||
(selector, selector_ctx),
|
||||
|
||||
@@ -58,18 +58,19 @@ impl DataRegion {
|
||||
/// Invoker don't need to set up or verify the column id. This method will adjust
|
||||
/// it using underlying schema.
|
||||
///
|
||||
/// This method will also set the nullable marker to true.
|
||||
/// This method will also set the nullable marker to true. All of those change are applies
|
||||
/// to `columns` in-place.
|
||||
pub async fn add_columns(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
columns: Vec<ColumnMetadata>,
|
||||
columns: &mut [ColumnMetadata],
|
||||
) -> Result<()> {
|
||||
let region_id = utils::to_data_region_id(region_id);
|
||||
|
||||
let mut retries = 0;
|
||||
// submit alter request
|
||||
while retries < MAX_RETRIES {
|
||||
let request = self.assemble_alter_request(region_id, &columns).await?;
|
||||
let request = self.assemble_alter_request(region_id, columns).await?;
|
||||
|
||||
let _timer = MITO_DDL_DURATION.start_timer();
|
||||
|
||||
@@ -90,10 +91,12 @@ impl DataRegion {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate warpped [RegionAlterRequest] with given [ColumnMetadata].
|
||||
/// This method will modify `columns` in-place.
|
||||
async fn assemble_alter_request(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
columns: &[ColumnMetadata],
|
||||
columns: &mut [ColumnMetadata],
|
||||
) -> Result<RegionRequest> {
|
||||
// retrieve underlying version
|
||||
let region_metadata = self
|
||||
@@ -118,15 +121,14 @@ impl DataRegion {
|
||||
.unwrap_or(0);
|
||||
|
||||
// overwrite semantic type
|
||||
let columns = columns
|
||||
.iter()
|
||||
let new_columns = columns
|
||||
.iter_mut()
|
||||
.enumerate()
|
||||
.map(|(delta, c)| {
|
||||
let mut c = c.clone();
|
||||
if c.semantic_type == SemanticType::Tag {
|
||||
if !c.column_schema.data_type.is_string() {
|
||||
return ColumnTypeMismatchSnafu {
|
||||
column_type: c.column_schema.data_type,
|
||||
column_type: c.column_schema.data_type.clone(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
@@ -138,11 +140,10 @@ impl DataRegion {
|
||||
};
|
||||
|
||||
c.column_id = new_column_id_start + delta as u32;
|
||||
|
||||
c.column_schema = c.column_schema.with_nullable_set();
|
||||
c.column_schema.set_nullable();
|
||||
|
||||
Ok(AddColumn {
|
||||
column_metadata: c,
|
||||
column_metadata: c.clone(),
|
||||
location: None,
|
||||
})
|
||||
})
|
||||
@@ -151,7 +152,9 @@ impl DataRegion {
|
||||
// assemble alter request
|
||||
let alter_request = RegionRequest::Alter(RegionAlterRequest {
|
||||
schema_version: version,
|
||||
kind: AlterKind::AddColumns { columns },
|
||||
kind: AlterKind::AddColumns {
|
||||
columns: new_columns,
|
||||
},
|
||||
});
|
||||
|
||||
Ok(alter_request)
|
||||
@@ -167,6 +170,7 @@ impl DataRegion {
|
||||
.handle_request(region_id, RegionRequest::Put(request))
|
||||
.await
|
||||
.context(MitoWriteOperationSnafu)
|
||||
.map(|result| result.affected_rows)
|
||||
}
|
||||
|
||||
pub async fn physical_columns(
|
||||
@@ -205,7 +209,7 @@ mod test {
|
||||
// TestEnv will create a logical region which changes the version to 1.
|
||||
assert_eq!(current_version, 1);
|
||||
|
||||
let new_columns = vec![
|
||||
let mut new_columns = vec![
|
||||
ColumnMetadata {
|
||||
column_id: 0,
|
||||
semantic_type: SemanticType::Tag,
|
||||
@@ -226,7 +230,7 @@ mod test {
|
||||
},
|
||||
];
|
||||
env.data_region()
|
||||
.add_columns(env.default_physical_region_id(), new_columns)
|
||||
.add_columns(env.default_physical_region_id(), &mut new_columns)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -258,14 +262,14 @@ mod test {
|
||||
let env = TestEnv::new().await;
|
||||
env.init_metric_region().await;
|
||||
|
||||
let new_columns = vec![ColumnMetadata {
|
||||
let mut new_columns = vec![ColumnMetadata {
|
||||
column_id: 0,
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_schema: ColumnSchema::new("tag2", ConcreteDataType::int64_datatype(), false),
|
||||
}];
|
||||
let result = env
|
||||
.data_region()
|
||||
.add_columns(env.default_physical_region_id(), new_columns)
|
||||
.add_columns(env.default_physical_region_id(), &mut new_columns)
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ mod region_metadata;
|
||||
mod state;
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use async_trait::async_trait;
|
||||
@@ -33,13 +34,13 @@ use common_recordbatch::SendableRecordBatchStream;
|
||||
use mito2::engine::MitoEngine;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
|
||||
use store_api::region_engine::{RegionEngine, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::{AffectedRows, RegionRequest};
|
||||
use store_api::region_engine::{RegionEngine, RegionHandleResult, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
|
||||
use self::state::MetricEngineState;
|
||||
use crate::data_region::DataRegion;
|
||||
use crate::error::Result;
|
||||
use crate::error::{Result, UnsupportedRegionRequestSnafu};
|
||||
use crate::metadata_region::MetadataRegion;
|
||||
use crate::utils;
|
||||
|
||||
@@ -121,23 +122,39 @@ impl RegionEngine for MetricEngine {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows, BoxedError> {
|
||||
) -> Result<RegionHandleResult, BoxedError> {
|
||||
let mut extension_return_value = HashMap::new();
|
||||
|
||||
let result = match request {
|
||||
RegionRequest::Put(put) => self.inner.put_region(region_id, put).await,
|
||||
RegionRequest::Delete(_) => todo!(),
|
||||
RegionRequest::Create(create) => self.inner.create_region(region_id, create).await,
|
||||
RegionRequest::Create(create) => {
|
||||
self.inner
|
||||
.create_region(region_id, create, &mut extension_return_value)
|
||||
.await
|
||||
}
|
||||
RegionRequest::Drop(drop) => self.inner.drop_region(region_id, drop).await,
|
||||
RegionRequest::Open(open) => self.inner.open_region(region_id, open).await,
|
||||
RegionRequest::Close(close) => self.inner.close_region(region_id, close).await,
|
||||
RegionRequest::Alter(alter) => self.inner.alter_region(region_id, alter).await,
|
||||
RegionRequest::Flush(_) => todo!(),
|
||||
RegionRequest::Compact(_) => todo!(),
|
||||
RegionRequest::Truncate(_) => todo!(),
|
||||
RegionRequest::Alter(alter) => {
|
||||
self.inner
|
||||
.alter_region(region_id, alter, &mut extension_return_value)
|
||||
.await
|
||||
}
|
||||
RegionRequest::Delete(_)
|
||||
| RegionRequest::Flush(_)
|
||||
| RegionRequest::Compact(_)
|
||||
| RegionRequest::Truncate(_) => UnsupportedRegionRequestSnafu { request }.fail(),
|
||||
// It always Ok(0), all data is the latest.
|
||||
RegionRequest::Catchup(_) => Ok(0),
|
||||
};
|
||||
|
||||
result.map_err(BoxedError::new)
|
||||
// TODO: pass extension
|
||||
result
|
||||
.map_err(BoxedError::new)
|
||||
.map(|rows| RegionHandleResult {
|
||||
affected_rows: rows,
|
||||
extension: extension_return_value,
|
||||
})
|
||||
}
|
||||
|
||||
/// Handles substrait query and return a stream of record batches
|
||||
|
||||
@@ -12,13 +12,19 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_telemetry::{error, info};
|
||||
use snafu::OptionExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::ALTER_PHYSICAL_EXTENSION_KEY;
|
||||
use store_api::region_request::{AffectedRows, AlterKind, RegionAlterRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::engine::MetricEngineInner;
|
||||
use crate::error::{ForbiddenPhysicalAlterSnafu, LogicalRegionNotFoundSnafu, Result};
|
||||
use crate::error::{
|
||||
ForbiddenPhysicalAlterSnafu, LogicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu,
|
||||
};
|
||||
use crate::metrics::FORBIDDEN_OPERATION_COUNT;
|
||||
use crate::utils::{to_data_region_id, to_metadata_region_id};
|
||||
|
||||
@@ -28,23 +34,39 @@ impl MetricEngineInner {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionAlterRequest,
|
||||
extension_return_value: &mut HashMap<String, Vec<u8>>,
|
||||
) -> Result<AffectedRows> {
|
||||
let is_altering_physical_region = self.is_physical_region(region_id);
|
||||
|
||||
let result = if is_altering_physical_region {
|
||||
self.alter_physical_region(region_id, request).await
|
||||
} else {
|
||||
self.alter_logical_region(region_id, request).await
|
||||
let physical_region_id = self.alter_logical_region(region_id, request).await?;
|
||||
|
||||
// Add physical table's column to extension map.
|
||||
// It's ok to overwrite existing key, as the latter come schema is more up-to-date
|
||||
let physical_columns = self
|
||||
.data_region
|
||||
.physical_columns(physical_region_id)
|
||||
.await?;
|
||||
extension_return_value.insert(
|
||||
ALTER_PHYSICAL_EXTENSION_KEY.to_string(),
|
||||
ColumnMetadata::encode_list(&physical_columns)
|
||||
.context(SerializeColumnMetadataSnafu)?,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
result.map(|_| 0)
|
||||
}
|
||||
|
||||
/// Return the physical region id behind this logical region
|
||||
async fn alter_logical_region(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionAlterRequest,
|
||||
) -> Result<()> {
|
||||
) -> Result<RegionId> {
|
||||
let physical_region_id = {
|
||||
let state = &self.state.read().unwrap();
|
||||
state.get_physical_region_id(region_id).with_context(|| {
|
||||
@@ -55,7 +77,7 @@ impl MetricEngineInner {
|
||||
|
||||
// only handle adding column
|
||||
let AlterKind::AddColumns { columns } = request.kind else {
|
||||
return Ok(());
|
||||
return Ok(physical_region_id);
|
||||
};
|
||||
|
||||
let metadata_region_id = to_metadata_region_id(physical_region_id);
|
||||
@@ -92,7 +114,7 @@ impl MetricEngineInner {
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(physical_region_id)
|
||||
}
|
||||
|
||||
async fn alter_physical_region(
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::info;
|
||||
use common_time::Timestamp;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
@@ -25,22 +26,26 @@ use object_store::util::join_dir;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::{
|
||||
DATA_REGION_SUBDIR, DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
|
||||
LOGICAL_TABLE_METADATA_KEY, METADATA_REGION_SUBDIR, METADATA_SCHEMA_KEY_COLUMN_INDEX,
|
||||
METADATA_SCHEMA_KEY_COLUMN_NAME, METADATA_SCHEMA_TIMESTAMP_COLUMN_INDEX,
|
||||
METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME, METADATA_SCHEMA_VALUE_COLUMN_INDEX,
|
||||
METADATA_SCHEMA_VALUE_COLUMN_NAME, PHYSICAL_TABLE_METADATA_KEY,
|
||||
ALTER_PHYSICAL_EXTENSION_KEY, DATA_REGION_SUBDIR, DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
|
||||
DATA_SCHEMA_TSID_COLUMN_NAME, LOGICAL_TABLE_METADATA_KEY, METADATA_REGION_SUBDIR,
|
||||
METADATA_SCHEMA_KEY_COLUMN_INDEX, METADATA_SCHEMA_KEY_COLUMN_NAME,
|
||||
METADATA_SCHEMA_TIMESTAMP_COLUMN_INDEX, METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
|
||||
METADATA_SCHEMA_VALUE_COLUMN_INDEX, METADATA_SCHEMA_VALUE_COLUMN_NAME,
|
||||
PHYSICAL_TABLE_METADATA_KEY,
|
||||
};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest};
|
||||
use store_api::storage::consts::ReservedColumnId;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::engine::options::set_index_options_for_data_region;
|
||||
use crate::engine::options::{
|
||||
set_index_options_for_data_region, set_memtable_options_for_data_region,
|
||||
};
|
||||
use crate::engine::MetricEngineInner;
|
||||
use crate::error::{
|
||||
ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu,
|
||||
MissingRegionOptionSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, Result,
|
||||
ColumnNotFoundSnafu, ConflictRegionOptionSnafu, CreateMitoRegionSnafu,
|
||||
InternalColumnOccupiedSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu,
|
||||
ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu,
|
||||
};
|
||||
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT};
|
||||
use crate::utils::{to_data_region_id, to_metadata_region_id};
|
||||
@@ -51,13 +56,28 @@ impl MetricEngineInner {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionCreateRequest,
|
||||
extension_return_value: &mut HashMap<String, Vec<u8>>,
|
||||
) -> Result<AffectedRows> {
|
||||
Self::verify_region_create_request(&request)?;
|
||||
|
||||
let result = if request.options.contains_key(PHYSICAL_TABLE_METADATA_KEY) {
|
||||
self.create_physical_region(region_id, request).await
|
||||
} else if request.options.contains_key(LOGICAL_TABLE_METADATA_KEY) {
|
||||
self.create_logical_region(region_id, request).await
|
||||
let physical_region_id = self.create_logical_region(region_id, request).await?;
|
||||
|
||||
// Add physical table's column to extension map.
|
||||
// It's ok to overwrite existing key, as the latter come schema is more up-to-date
|
||||
let physical_columns = self
|
||||
.data_region
|
||||
.physical_columns(physical_region_id)
|
||||
.await?;
|
||||
extension_return_value.insert(
|
||||
ALTER_PHYSICAL_EXTENSION_KEY.to_string(),
|
||||
ColumnMetadata::encode_list(&physical_columns)
|
||||
.context(SerializeColumnMetadataSnafu)?,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
MissingRegionOptionSnafu {}.fail()
|
||||
};
|
||||
@@ -124,11 +144,16 @@ impl MetricEngineInner {
|
||||
/// This method will alter the data region to add columns if necessary.
|
||||
///
|
||||
/// If the logical region to create already exists, this method will do nothing.
|
||||
///
|
||||
/// `alter_request` is a hashmap that stores the alter requests that were executed
|
||||
/// to the physical region.
|
||||
///
|
||||
/// Return the physical region id of this logical region
|
||||
async fn create_logical_region(
|
||||
&self,
|
||||
logical_region_id: RegionId,
|
||||
request: RegionCreateRequest,
|
||||
) -> Result<()> {
|
||||
) -> Result<RegionId> {
|
||||
// transform IDs
|
||||
let physical_region_id_raw = request
|
||||
.options
|
||||
@@ -149,11 +174,12 @@ impl MetricEngineInner {
|
||||
.await?
|
||||
{
|
||||
info!("Create a existing logical region {logical_region_id}. Skipped");
|
||||
return Ok(());
|
||||
return Ok(data_region_id);
|
||||
}
|
||||
|
||||
// find new columns to add
|
||||
let mut new_columns = vec![];
|
||||
let mut existing_columns = vec![];
|
||||
{
|
||||
let state = &self.state.read().unwrap();
|
||||
let physical_columns =
|
||||
@@ -166,6 +192,8 @@ impl MetricEngineInner {
|
||||
for col in &request.column_metadatas {
|
||||
if !physical_columns.contains(&col.column_schema.name) {
|
||||
new_columns.push(col.clone());
|
||||
} else {
|
||||
existing_columns.push(col.column_schema.name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,9 +214,28 @@ impl MetricEngineInner {
|
||||
self.metadata_region
|
||||
.add_logical_region(metadata_region_id, logical_region_id)
|
||||
.await?;
|
||||
for col in &request.column_metadatas {
|
||||
|
||||
// register existing physical column to this new logical region.
|
||||
let physical_schema = self
|
||||
.data_region
|
||||
.physical_columns(data_region_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(MitoReadOperationSnafu)?;
|
||||
let physical_schema_map = physical_schema
|
||||
.into_iter()
|
||||
.map(|metadata| (metadata.column_schema.name.clone(), metadata))
|
||||
.collect::<HashMap<_, _>>();
|
||||
for col in &existing_columns {
|
||||
let column_metadata = physical_schema_map
|
||||
.get(col)
|
||||
.with_context(|| ColumnNotFoundSnafu {
|
||||
name: col,
|
||||
region_id: physical_region_id,
|
||||
})?
|
||||
.clone();
|
||||
self.metadata_region
|
||||
.add_column(metadata_region_id, logical_region_id, col)
|
||||
.add_column(metadata_region_id, logical_region_id, &column_metadata)
|
||||
.await?;
|
||||
}
|
||||
|
||||
@@ -201,19 +248,21 @@ impl MetricEngineInner {
|
||||
info!("Created new logical region {logical_region_id} on physical region {data_region_id}");
|
||||
LOGICAL_REGION_COUNT.inc();
|
||||
|
||||
Ok(())
|
||||
Ok(data_region_id)
|
||||
}
|
||||
|
||||
/// Execute corresponding alter requests to mito region. New added columns' [ColumnMetadata] will be
|
||||
/// cloned into `added_columns`.
|
||||
pub(crate) async fn add_columns_to_physical_data_region(
|
||||
&self,
|
||||
data_region_id: RegionId,
|
||||
metadata_region_id: RegionId,
|
||||
logical_region_id: RegionId,
|
||||
new_columns: Vec<ColumnMetadata>,
|
||||
mut new_columns: Vec<ColumnMetadata>,
|
||||
) -> Result<()> {
|
||||
// alter data region
|
||||
self.data_region
|
||||
.add_columns(data_region_id, new_columns.clone())
|
||||
.add_columns(data_region_id, &mut new_columns)
|
||||
.await?;
|
||||
|
||||
// register columns to metadata region
|
||||
@@ -360,13 +409,13 @@ impl MetricEngineInner {
|
||||
// concat region dir
|
||||
data_region_request.region_dir = join_dir(&request.region_dir, DATA_REGION_SUBDIR);
|
||||
|
||||
// convert semantic type
|
||||
// change nullability for tag columns
|
||||
data_region_request
|
||||
.column_metadatas
|
||||
.iter_mut()
|
||||
.for_each(|metadata| {
|
||||
if metadata.semantic_type == SemanticType::Tag {
|
||||
metadata.semantic_type = SemanticType::Field;
|
||||
metadata.column_schema.set_nullable();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -380,6 +429,9 @@ impl MetricEngineInner {
|
||||
// set index options
|
||||
set_index_options_for_data_region(&mut data_region_request.options);
|
||||
|
||||
// Set memtable options.
|
||||
set_memtable_options_for_data_region(&mut data_region_request.options);
|
||||
|
||||
data_region_request
|
||||
}
|
||||
|
||||
|
||||
@@ -42,3 +42,8 @@ pub fn set_index_options_for_data_region(options: &mut HashMap<String, String>)
|
||||
SEG_ROW_COUNT_FOR_DATA_REGION.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Set memtable options for the data region.
|
||||
pub fn set_memtable_options_for_data_region(options: &mut HashMap<String, String>) {
|
||||
options.insert("memtable.type".to_string(), "partition_tree".to_string());
|
||||
}
|
||||
|
||||
@@ -215,12 +215,12 @@ mod tests {
|
||||
|
||||
// write data
|
||||
let logical_region_id = env.default_logical_region_id();
|
||||
let count = env
|
||||
let result = env
|
||||
.metric()
|
||||
.handle_request(logical_region_id, request)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count, 5);
|
||||
assert_eq!(result.affected_rows, 5);
|
||||
|
||||
// read data from physical region
|
||||
let physical_region_id = env.default_physical_region_id();
|
||||
@@ -287,11 +287,11 @@ mod tests {
|
||||
});
|
||||
|
||||
// write data
|
||||
let count = engine
|
||||
let result = engine
|
||||
.handle_request(logical_region_id, request)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(100, count);
|
||||
assert_eq!(100, result.affected_rows);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -143,6 +143,7 @@ impl MetricEngineInner {
|
||||
self.default_projection(physical_region_id, logical_region_id)
|
||||
.await?
|
||||
};
|
||||
|
||||
request.projection = Some(physical_projection);
|
||||
|
||||
// add table filter
|
||||
@@ -186,6 +187,7 @@ impl MetricEngineInner {
|
||||
.get_metadata(data_region_id)
|
||||
.await
|
||||
.context(MitoReadOperationSnafu)?;
|
||||
|
||||
for name in projected_logical_names {
|
||||
// Safety: logical columns is a strict subset of physical columns
|
||||
physical_projection.push(physical_metadata.column_index_by_name(&name).unwrap());
|
||||
@@ -301,7 +303,7 @@ mod test {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(scan_req.projection.unwrap(), vec![0, 1, 4, 8, 9, 10, 11]);
|
||||
assert_eq!(scan_req.projection.unwrap(), vec![11, 10, 9, 8, 0, 1, 4]);
|
||||
assert_eq!(scan_req.filters.len(), 1);
|
||||
assert_eq!(
|
||||
scan_req.filters[0],
|
||||
@@ -318,6 +320,6 @@ mod test {
|
||||
.transform_request(physical_region_id, logical_region_id, scan_req)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(scan_req.projection.unwrap(), vec![0, 1, 4, 8, 9, 10, 11]);
|
||||
assert_eq!(scan_req.projection.unwrap(), vec![11, 10, 9, 8, 0, 1, 4]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,7 +39,8 @@ impl MetricEngineInner {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// sort columns on column id to ensure the order
|
||||
logical_column_metadata.sort_unstable_by_key(|col| col.column_id);
|
||||
logical_column_metadata
|
||||
.sort_unstable_by(|c1, c2| c1.column_schema.name.cmp(&c2.column_schema.name));
|
||||
|
||||
Ok(logical_column_metadata)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
#[derive(Snafu)]
|
||||
@@ -71,6 +72,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize column metadata"))]
|
||||
SerializeColumnMetadata {
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode base64 column value"))]
|
||||
DecodeColumnValue {
|
||||
#[snafu(source)]
|
||||
@@ -155,6 +163,12 @@ pub enum Error {
|
||||
region_id: RegionId,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported region request: {}", request))]
|
||||
UnsupportedRegionRequest {
|
||||
request: RegionRequest,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -170,11 +184,14 @@ impl ErrorExt for Error {
|
||||
| ColumnTypeMismatch { .. }
|
||||
| PhysicalRegionBusy { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
ForbiddenPhysicalAlter { .. } => StatusCode::Unsupported,
|
||||
ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => {
|
||||
StatusCode::Unsupported
|
||||
}
|
||||
|
||||
MissingInternalColumn { .. }
|
||||
| DeserializeSemanticType { .. }
|
||||
| DeserializeColumnMetadata { .. }
|
||||
| SerializeColumnMetadata { .. }
|
||||
| DecodeColumnValue { .. }
|
||||
| ParseRegionId { .. }
|
||||
| InvalidMetadata { .. } => StatusCode::Unexpected,
|
||||
|
||||
@@ -167,7 +167,7 @@ impl MetadataRegion {
|
||||
|
||||
// TODO(ruihang): avoid using `get_all`
|
||||
/// Get all the columns of a given logical region.
|
||||
/// Return a list of (column_name, semantic_type).
|
||||
/// Return a list of (column_name, column_metadata).
|
||||
pub async fn logical_columns(
|
||||
&self,
|
||||
physical_region_id: RegionId,
|
||||
|
||||
@@ -56,8 +56,9 @@ pin-project.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
puffin.workspace = true
|
||||
rand.workspace = true
|
||||
regex = "1.5"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
smallvec.workspace = true
|
||||
@@ -75,7 +76,6 @@ common-procedure-test.workspace = true
|
||||
common-test-util.workspace = true
|
||||
criterion = "0.4"
|
||||
log-store.workspace = true
|
||||
rand.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
[[bench]]
|
||||
|
||||
@@ -21,7 +21,7 @@ use datafusion_common::Column;
|
||||
use datafusion_expr::{lit, Expr};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
||||
use mito2::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtable};
|
||||
use mito2::memtable::time_series::TimeSeriesMemtable;
|
||||
use mito2::memtable::{KeyValues, Memtable};
|
||||
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
|
||||
@@ -41,9 +41,9 @@ fn write_rows(c: &mut Criterion) {
|
||||
|
||||
// Note that this test only generate one time series.
|
||||
let mut group = c.benchmark_group("write");
|
||||
group.bench_function("merge_tree", |b| {
|
||||
group.bench_function("partition_tree", |b| {
|
||||
let memtable =
|
||||
MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||
PartitionTreeMemtable::new(1, metadata.clone(), None, &PartitionTreeConfig::default());
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
@@ -51,7 +51,7 @@ fn write_rows(c: &mut Criterion) {
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true);
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
@@ -63,14 +63,14 @@ fn write_rows(c: &mut Criterion) {
|
||||
/// Scans all rows.
|
||||
fn full_scan(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let config = MergeTreeConfig::default();
|
||||
let config = PartitionTreeConfig::default();
|
||||
let start_sec = 1710043200;
|
||||
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||
|
||||
let mut group = c.benchmark_group("full_scan");
|
||||
group.sample_size(10);
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
group.bench_function("partition_tree", |b| {
|
||||
let memtable = PartitionTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
@@ -83,7 +83,7 @@ fn full_scan(c: &mut Criterion) {
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
@@ -100,14 +100,14 @@ fn full_scan(c: &mut Criterion) {
|
||||
/// Filters 1 host.
|
||||
fn filter_1_host(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let config = MergeTreeConfig::default();
|
||||
let config = PartitionTreeConfig::default();
|
||||
let start_sec = 1710043200;
|
||||
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||
|
||||
let mut group = c.benchmark_group("filter_1_host");
|
||||
group.sample_size(10);
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
group.bench_function("partition_tree", |b| {
|
||||
let memtable = PartitionTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
@@ -121,7 +121,7 @@ fn filter_1_host(c: &mut Criterion) {
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
|
||||
@@ -328,14 +328,14 @@ mod tests {
|
||||
fn test_deserialize_config() {
|
||||
let s = r#"
|
||||
[memtable]
|
||||
type = "experimental"
|
||||
type = "partition_tree"
|
||||
index_max_keys_per_shard = 8192
|
||||
data_freeze_threshold = 1024
|
||||
dedup = true
|
||||
fork_dictionary_bytes = "512MiB"
|
||||
"#;
|
||||
let config: MitoConfig = toml::from_str(s).unwrap();
|
||||
let MemtableConfig::Experimental(config) = &config.memtable else {
|
||||
let MemtableConfig::PartitionTree(config) = &config.memtable else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(1024, config.data_freeze_threshold);
|
||||
|
||||
@@ -57,7 +57,7 @@ use object_store::manager::ObjectStoreManagerRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{RegionEngine, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_engine::{RegionEngine, RegionHandleResult, RegionRole, SetReadonlyResponse};
|
||||
use store_api::region_request::{AffectedRows, RegionRequest};
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
use tokio::sync::oneshot;
|
||||
@@ -290,10 +290,11 @@ impl RegionEngine for MitoEngine {
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: RegionRequest,
|
||||
) -> Result<AffectedRows, BoxedError> {
|
||||
) -> Result<RegionHandleResult, BoxedError> {
|
||||
self.inner
|
||||
.handle_request(region_id, request)
|
||||
.await
|
||||
.map(RegionHandleResult::new)
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
|
||||
@@ -373,6 +374,7 @@ impl MitoEngine {
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
write_buffer_manager: Option<crate::flush::WriteBufferManagerRef>,
|
||||
listener: Option<crate::engine::listener::EventListenerRef>,
|
||||
time_provider: crate::time_provider::TimeProviderRef,
|
||||
) -> Result<MitoEngine> {
|
||||
config.sanitize(data_home)?;
|
||||
|
||||
@@ -385,6 +387,7 @@ impl MitoEngine {
|
||||
object_store_manager,
|
||||
write_buffer_manager,
|
||||
listener,
|
||||
time_provider,
|
||||
)
|
||||
.await?,
|
||||
config,
|
||||
|
||||
@@ -111,7 +111,7 @@ async fn test_region_replay() {
|
||||
|
||||
let engine = env.reopen_engine(engine, MitoConfig::default()).await;
|
||||
|
||||
let rows = engine
|
||||
let result = engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Open(RegionOpenRequest {
|
||||
@@ -123,7 +123,7 @@ async fn test_region_replay() {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(0, rows);
|
||||
assert_eq!(0, result.affected_rows);
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.handle_query(region_id, request).await.unwrap();
|
||||
|
||||
@@ -42,7 +42,7 @@ async fn put_and_flush(
|
||||
};
|
||||
put_rows(engine, region_id, rows).await;
|
||||
|
||||
let rows = engine
|
||||
let result = engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Flush(RegionFlushRequest {
|
||||
@@ -51,7 +51,7 @@ async fn put_and_flush(
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(0, rows);
|
||||
assert_eq!(0, result.affected_rows);
|
||||
}
|
||||
|
||||
async fn delete_and_flush(
|
||||
@@ -66,16 +66,16 @@ async fn delete_and_flush(
|
||||
rows: build_rows_for_key("a", rows.start, rows.end, 0),
|
||||
};
|
||||
|
||||
let rows_affected = engine
|
||||
let result = engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Delete(RegionDeleteRequest { rows }),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(row_cnt, rows_affected);
|
||||
assert_eq!(row_cnt, result.affected_rows);
|
||||
|
||||
let rows = engine
|
||||
let result = engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Flush(RegionFlushRequest {
|
||||
@@ -84,7 +84,7 @@ async fn delete_and_flush(
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(0, rows);
|
||||
assert_eq!(0, result.affected_rows);
|
||||
}
|
||||
|
||||
async fn collect_stream_ts(stream: SendableRecordBatchStream) -> Vec<i64> {
|
||||
@@ -127,11 +127,11 @@ async fn test_compaction_region() {
|
||||
delete_and_flush(&engine, region_id, &column_schemas, 15..30).await;
|
||||
put_and_flush(&engine, region_id, &column_schemas, 15..25).await;
|
||||
|
||||
let output = engine
|
||||
let result = engine
|
||||
.handle_request(region_id, RegionRequest::Compact(RegionCompactRequest {}))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(output, 0);
|
||||
assert_eq!(result.affected_rows, 0);
|
||||
|
||||
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
|
||||
assert_eq!(
|
||||
|
||||
@@ -14,12 +14,15 @@
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::Rows;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{RegionCloseRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
|
||||
use crate::config::MitoConfig;
|
||||
use crate::test_util::{CreateRequestBuilder, TestEnv};
|
||||
use crate::region::options::MemtableOptions;
|
||||
use crate::test_util::{build_rows, put_rows, rows_schema, CreateRequestBuilder, TestEnv};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_new_region() {
|
||||
@@ -198,3 +201,45 @@ async fn test_engine_create_with_custom_store() {
|
||||
.await
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_engine_create_with_memtable_opts() {
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("memtable.type", "partition_tree")
|
||||
.insert_option("memtable.partition_tree.index_max_keys_per_shard", "2")
|
||||
.build();
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
let region = engine.get_region(region_id).unwrap();
|
||||
let Some(MemtableOptions::PartitionTree(memtable_opts)) = ®ion.version().options.memtable
|
||||
else {
|
||||
unreachable!();
|
||||
};
|
||||
assert_eq!(2, memtable_opts.index_max_keys_per_shard);
|
||||
|
||||
let rows = Rows {
|
||||
schema: column_schemas,
|
||||
rows: build_rows(0, 3),
|
||||
};
|
||||
put_rows(&engine, region_id, rows).await;
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.handle_query(region_id, request).await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
+-------+---------+---------------------+";
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
@@ -14,10 +14,13 @@
|
||||
|
||||
//! Flush tests for mito engine.
|
||||
|
||||
use std::sync::atomic::{AtomicI64, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::Rows;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_time::util::current_time_millis;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::RegionRequest;
|
||||
use store_api::storage::{RegionId, ScanRequest};
|
||||
@@ -28,6 +31,8 @@ use crate::test_util::{
|
||||
build_rows, build_rows_for_key, flush_region, put_rows, reopen_region, rows_schema,
|
||||
CreateRequestBuilder, MockWriteBufferManager, TestEnv,
|
||||
};
|
||||
use crate::time_provider::TimeProvider;
|
||||
use crate::worker::MAX_INITIAL_CHECK_DELAY_SECS;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_manual_flush() {
|
||||
@@ -272,3 +277,101 @@ async fn test_flush_reopen_region() {
|
||||
assert_eq!(2, version_data.last_entry_id);
|
||||
assert_eq!(5, version_data.committed_sequence);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MockTimeProvider {
|
||||
now: AtomicI64,
|
||||
elapsed: AtomicI64,
|
||||
}
|
||||
|
||||
impl TimeProvider for MockTimeProvider {
|
||||
fn current_time_millis(&self) -> i64 {
|
||||
self.now.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn elapsed_since(&self, _current_millis: i64) -> i64 {
|
||||
self.elapsed.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn wait_duration(&self, _duration: Duration) -> Duration {
|
||||
Duration::from_millis(20)
|
||||
}
|
||||
}
|
||||
|
||||
impl MockTimeProvider {
|
||||
fn new(now: i64) -> Self {
|
||||
Self {
|
||||
now: AtomicI64::new(now),
|
||||
elapsed: AtomicI64::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_now(&self, now: i64) {
|
||||
self.now.store(now, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
fn set_elapsed(&self, elapsed: i64) {
|
||||
self.elapsed.store(elapsed, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_flush_engine() {
|
||||
let mut env = TestEnv::new();
|
||||
let write_buffer_manager = Arc::new(MockWriteBufferManager::default());
|
||||
let listener = Arc::new(FlushListener::default());
|
||||
let now = current_time_millis();
|
||||
let time_provider = Arc::new(MockTimeProvider::new(now));
|
||||
let engine = env
|
||||
.create_engine_with_time(
|
||||
MitoConfig {
|
||||
auto_flush_interval: Duration::from_secs(60 * 5),
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager.clone()),
|
||||
Some(listener.clone()),
|
||||
time_provider.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Prepares rows for flush.
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows_for_key("a", 0, 2, 0),
|
||||
};
|
||||
put_rows(&engine, region_id, rows).await;
|
||||
|
||||
// Sets current time to now + auto_flush_interval * 2.
|
||||
time_provider.set_now(now + (60 * 5 * 2) * 1000);
|
||||
// Sets elapsed time to MAX_INITIAL_CHECK_DELAY_SECS + 1.
|
||||
time_provider.set_elapsed((MAX_INITIAL_CHECK_DELAY_SECS as i64 + 1) * 1000);
|
||||
|
||||
// Wait until flush is finished.
|
||||
tokio::time::timeout(Duration::from_secs(3), listener.wait())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let scanner = engine.scanner(region_id, request).unwrap();
|
||||
assert_eq!(0, scanner.num_memtables());
|
||||
assert_eq!(1, scanner.num_files());
|
||||
let stream = scanner.scan().await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| a | 0.0 | 1970-01-01T00:00:00 |
|
||||
| a | 1.0 | 1970-01-01T00:00:01 |
|
||||
+-------+---------+---------------------+";
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
@@ -572,6 +572,9 @@ pub enum Error {
|
||||
#[snafu(source)]
|
||||
error: parquet::errors::ParquetError,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid region options, {}", reason))]
|
||||
InvalidRegionOptions { reason: String, location: Location },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -621,7 +624,8 @@ impl ErrorExt for Error {
|
||||
| FillDefault { .. }
|
||||
| ConvertColumnDataType { .. }
|
||||
| ColumnNotFound { .. }
|
||||
| InvalidMetadata { .. } => StatusCode::InvalidArguments,
|
||||
| InvalidMetadata { .. }
|
||||
| InvalidRegionOptions { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InvalidRegionRequestSchemaVersion { .. } => StatusCode::RequestOutdated,
|
||||
|
||||
|
||||
@@ -176,6 +176,8 @@ pub enum FlushReason {
|
||||
Manual,
|
||||
/// Flush to alter table.
|
||||
Alter,
|
||||
/// Flush periodically.
|
||||
Periodically,
|
||||
}
|
||||
|
||||
impl FlushReason {
|
||||
@@ -432,18 +434,19 @@ impl FlushScheduler {
|
||||
) -> Result<()> {
|
||||
debug_assert_eq!(region_id, task.region_id);
|
||||
|
||||
FLUSH_REQUESTS_TOTAL
|
||||
.with_label_values(&[task.reason.as_str()])
|
||||
.inc();
|
||||
|
||||
let version = version_control.current().version;
|
||||
if version.memtables.mutable.is_empty() && version.memtables.immutables().is_empty() {
|
||||
if version.memtables.is_empty() {
|
||||
debug_assert!(!self.region_status.contains_key(®ion_id));
|
||||
// The region has nothing to flush.
|
||||
task.on_success();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Don't increase the counter if a region has nothing to flush.
|
||||
FLUSH_REQUESTS_TOTAL
|
||||
.with_label_values(&[task.reason.as_str()])
|
||||
.inc();
|
||||
|
||||
// Add this region to status map.
|
||||
let flush_status = self
|
||||
.region_status
|
||||
|
||||
@@ -40,6 +40,7 @@ pub mod request;
|
||||
pub mod row_converter;
|
||||
pub(crate) mod schedule;
|
||||
pub mod sst;
|
||||
mod time_provider;
|
||||
pub mod wal;
|
||||
mod worker;
|
||||
|
||||
|
||||
@@ -28,12 +28,14 @@ use crate::error::Result;
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
pub use crate::memtable::key_values::KeyValues;
|
||||
use crate::memtable::merge_tree::MergeTreeConfig;
|
||||
use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder};
|
||||
use crate::memtable::time_series::TimeSeriesMemtableBuilder;
|
||||
use crate::metrics::WRITE_BUFFER_BYTES;
|
||||
use crate::read::Batch;
|
||||
use crate::region::options::MemtableOptions;
|
||||
|
||||
pub mod key_values;
|
||||
pub mod merge_tree;
|
||||
pub mod partition_tree;
|
||||
pub mod time_partition;
|
||||
pub mod time_series;
|
||||
pub(crate) mod version;
|
||||
@@ -47,13 +49,13 @@ pub type MemtableId = u32;
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum MemtableConfig {
|
||||
Experimental(MergeTreeConfig),
|
||||
PartitionTree(PartitionTreeConfig),
|
||||
TimeSeries,
|
||||
}
|
||||
|
||||
impl Default for MemtableConfig {
|
||||
fn default() -> Self {
|
||||
Self::Experimental(MergeTreeConfig::default())
|
||||
Self::TimeSeries
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,6 +208,48 @@ impl Drop for AllocTracker {
|
||||
}
|
||||
}
|
||||
|
||||
/// Provider of memtable builders for regions.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct MemtableBuilderProvider {
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
default_memtable_builder: MemtableBuilderRef,
|
||||
}
|
||||
|
||||
impl MemtableBuilderProvider {
|
||||
pub(crate) fn new(
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
default_memtable_builder: MemtableBuilderRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
write_buffer_manager,
|
||||
default_memtable_builder,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn builder_for_options(
|
||||
&self,
|
||||
options: Option<&MemtableOptions>,
|
||||
) -> MemtableBuilderRef {
|
||||
match options {
|
||||
Some(MemtableOptions::TimeSeries) => Arc::new(TimeSeriesMemtableBuilder::new(
|
||||
self.write_buffer_manager.clone(),
|
||||
)),
|
||||
Some(MemtableOptions::PartitionTree(opts)) => {
|
||||
Arc::new(PartitionTreeMemtableBuilder::new(
|
||||
PartitionTreeConfig {
|
||||
index_max_keys_per_shard: opts.index_max_keys_per_shard,
|
||||
data_freeze_threshold: opts.data_freeze_threshold,
|
||||
fork_dictionary_bytes: opts.fork_dictionary_bytes,
|
||||
..Default::default()
|
||||
},
|
||||
self.write_buffer_manager.clone(),
|
||||
))
|
||||
}
|
||||
None => self.default_memtable_builder.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::readable_size::ReadableSize;
|
||||
@@ -216,20 +260,20 @@ mod tests {
|
||||
#[test]
|
||||
fn test_deserialize_memtable_config() {
|
||||
let s = r#"
|
||||
type = "experimental"
|
||||
type = "partition_tree"
|
||||
index_max_keys_per_shard = 8192
|
||||
data_freeze_threshold = 1024
|
||||
dedup = true
|
||||
fork_dictionary_bytes = "512MiB"
|
||||
"#;
|
||||
let config: MemtableConfig = toml::from_str(s).unwrap();
|
||||
let MemtableConfig::Experimental(merge_tree) = config else {
|
||||
let MemtableConfig::PartitionTree(memtable_config) = config else {
|
||||
unreachable!()
|
||||
};
|
||||
assert!(merge_tree.dedup);
|
||||
assert_eq!(8192, merge_tree.index_max_keys_per_shard);
|
||||
assert_eq!(1024, merge_tree.data_freeze_threshold);
|
||||
assert_eq!(ReadableSize::mb(512), merge_tree.fork_dictionary_bytes);
|
||||
assert!(memtable_config.dedup);
|
||||
assert_eq!(8192, memtable_config.index_max_keys_per_shard);
|
||||
assert_eq!(1024, memtable_config.data_freeze_threshold);
|
||||
assert_eq!(ReadableSize::mb(512), memtable_config.fork_dictionary_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Memtable implementation based on a merge tree.
|
||||
//! Memtable implementation based on a partition tree.
|
||||
|
||||
pub(crate) mod data;
|
||||
mod dedup;
|
||||
@@ -37,15 +37,17 @@ use table::predicate::Predicate;
|
||||
use crate::error::Result;
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::tree::MergeTree;
|
||||
use crate::memtable::partition_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::partition_tree::tree::PartitionTree;
|
||||
use crate::memtable::{
|
||||
AllocTracker, BoxedBatchIterator, KeyValues, Memtable, MemtableBuilder, MemtableId,
|
||||
MemtableRef, MemtableStats,
|
||||
};
|
||||
|
||||
/// Use `1/DICTIONARY_SIZE_FACTOR` of OS memory as dictionary size.
|
||||
const DICTIONARY_SIZE_FACTOR: u64 = 8;
|
||||
pub(crate) const DICTIONARY_SIZE_FACTOR: u64 = 8;
|
||||
pub(crate) const DEFAULT_MAX_KEYS_PER_SHARD: usize = 8192;
|
||||
pub(crate) const DEFAULT_FREEZE_THRESHOLD: usize = 131072;
|
||||
|
||||
/// Id of a shard, only unique inside a partition.
|
||||
type ShardId = u32;
|
||||
@@ -59,23 +61,30 @@ struct PkId {
|
||||
pk_index: PkIndex,
|
||||
}
|
||||
|
||||
/// Config for the merge tree memtable.
|
||||
// TODO(yingwen): `fork_dictionary_bytes` is per region option, if we have multiple partition tree
|
||||
// memtable then we will use a lot memory. We should find a better way to control the
|
||||
// dictionary size.
|
||||
/// Config for the partition tree memtable.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(default)]
|
||||
pub struct MergeTreeConfig {
|
||||
pub struct PartitionTreeConfig {
|
||||
/// Max keys in an index shard.
|
||||
pub index_max_keys_per_shard: usize,
|
||||
/// Number of rows to freeze a data part.
|
||||
pub data_freeze_threshold: usize,
|
||||
/// Whether to delete duplicates rows.
|
||||
///
|
||||
/// Skips deserializing as it should be determined by whether the
|
||||
/// table is append only.
|
||||
#[serde(skip_deserializing)]
|
||||
pub dedup: bool,
|
||||
/// Total bytes of dictionary to keep in fork.
|
||||
pub fork_dictionary_bytes: ReadableSize,
|
||||
}
|
||||
|
||||
impl Default for MergeTreeConfig {
|
||||
impl Default for PartitionTreeConfig {
|
||||
fn default() -> Self {
|
||||
let mut fork_dictionary_bytes = ReadableSize::gb(1);
|
||||
let mut fork_dictionary_bytes = ReadableSize::mb(512);
|
||||
if let Some(sys_memory) = common_config::utils::get_sys_total_memory() {
|
||||
let adjust_dictionary_bytes =
|
||||
std::cmp::min(sys_memory / DICTIONARY_SIZE_FACTOR, fork_dictionary_bytes);
|
||||
@@ -93,24 +102,24 @@ impl Default for MergeTreeConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// Memtable based on a merge tree.
|
||||
pub struct MergeTreeMemtable {
|
||||
/// Memtable based on a partition tree.
|
||||
pub struct PartitionTreeMemtable {
|
||||
id: MemtableId,
|
||||
tree: MergeTree,
|
||||
tree: PartitionTree,
|
||||
alloc_tracker: AllocTracker,
|
||||
max_timestamp: AtomicI64,
|
||||
min_timestamp: AtomicI64,
|
||||
}
|
||||
|
||||
impl fmt::Debug for MergeTreeMemtable {
|
||||
impl fmt::Debug for PartitionTreeMemtable {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("MergeTreeMemtable")
|
||||
f.debug_struct("PartitionTreeMemtable")
|
||||
.field("id", &self.id)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Memtable for MergeTreeMemtable {
|
||||
impl Memtable for PartitionTreeMemtable {
|
||||
fn id(&self) -> MemtableId {
|
||||
self.id
|
||||
}
|
||||
@@ -188,29 +197,29 @@ impl Memtable for MergeTreeMemtable {
|
||||
fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
|
||||
let tree = self.tree.fork(metadata.clone());
|
||||
|
||||
let memtable = MergeTreeMemtable::with_tree(id, tree);
|
||||
let memtable = PartitionTreeMemtable::with_tree(id, tree);
|
||||
Arc::new(memtable)
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeTreeMemtable {
|
||||
impl PartitionTreeMemtable {
|
||||
/// Returns a new memtable.
|
||||
pub fn new(
|
||||
id: MemtableId,
|
||||
metadata: RegionMetadataRef,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
config: &MergeTreeConfig,
|
||||
config: &PartitionTreeConfig,
|
||||
) -> Self {
|
||||
Self::with_tree(
|
||||
id,
|
||||
MergeTree::new(metadata, config, write_buffer_manager.clone()),
|
||||
PartitionTree::new(metadata, config, write_buffer_manager.clone()),
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a mutable memtable from the tree.
|
||||
///
|
||||
/// It also adds the bytes used by shared parts (e.g. index) to the memory usage.
|
||||
fn with_tree(id: MemtableId, tree: MergeTree) -> Self {
|
||||
fn with_tree(id: MemtableId, tree: PartitionTree) -> Self {
|
||||
let alloc_tracker = AllocTracker::new(tree.write_buffer_manager());
|
||||
|
||||
Self {
|
||||
@@ -269,17 +278,17 @@ impl MergeTreeMemtable {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder to build a [MergeTreeMemtable].
|
||||
/// Builder to build a [PartitionTreeMemtable].
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MergeTreeMemtableBuilder {
|
||||
config: MergeTreeConfig,
|
||||
pub struct PartitionTreeMemtableBuilder {
|
||||
config: PartitionTreeConfig,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
}
|
||||
|
||||
impl MergeTreeMemtableBuilder {
|
||||
impl PartitionTreeMemtableBuilder {
|
||||
/// Creates a new builder with specific `write_buffer_manager`.
|
||||
pub fn new(
|
||||
config: MergeTreeConfig,
|
||||
config: PartitionTreeConfig,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
) -> Self {
|
||||
Self {
|
||||
@@ -289,9 +298,9 @@ impl MergeTreeMemtableBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
impl MemtableBuilder for MergeTreeMemtableBuilder {
|
||||
impl MemtableBuilder for PartitionTreeMemtableBuilder {
|
||||
fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
|
||||
Arc::new(MergeTreeMemtable::new(
|
||||
Arc::new(PartitionTreeMemtable::new(
|
||||
id,
|
||||
metadata.clone(),
|
||||
self.write_buffer_manager.clone(),
|
||||
@@ -326,7 +335,8 @@ mod tests {
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
let memtable = MergeTreeMemtable::new(1, metadata, None, &MergeTreeConfig::default());
|
||||
let memtable =
|
||||
PartitionTreeMemtable::new(1, metadata, None, &PartitionTreeConfig::default());
|
||||
memtable.write(&kvs).unwrap();
|
||||
|
||||
let expected_ts = kvs
|
||||
@@ -362,7 +372,7 @@ mod tests {
|
||||
memtable_util::metadata_with_primary_key(vec![], false)
|
||||
};
|
||||
let memtable =
|
||||
MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||
PartitionTreeMemtable::new(1, metadata.clone(), None, &PartitionTreeConfig::default());
|
||||
|
||||
let kvs = memtable_util::build_key_values(
|
||||
&metadata,
|
||||
@@ -421,8 +431,8 @@ mod tests {
|
||||
memtable_util::metadata_with_primary_key(vec![], false)
|
||||
};
|
||||
// Try to build a memtable via the builder.
|
||||
let memtable =
|
||||
MergeTreeMemtableBuilder::new(MergeTreeConfig::default(), None).build(1, &metadata);
|
||||
let memtable = PartitionTreeMemtableBuilder::new(PartitionTreeConfig::default(), None)
|
||||
.build(1, &metadata);
|
||||
|
||||
let expect = (0..100).collect::<Vec<_>>();
|
||||
let kvs = memtable_util::build_key_values(&metadata, "hello".to_string(), 10, &expect, 1);
|
||||
@@ -457,11 +467,11 @@ mod tests {
|
||||
|
||||
fn write_iter_multi_keys(max_keys: usize, freeze_threshold: usize) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let memtable = MergeTreeMemtable::new(
|
||||
let memtable = PartitionTreeMemtable::new(
|
||||
1,
|
||||
metadata.clone(),
|
||||
None,
|
||||
&MergeTreeConfig {
|
||||
&PartitionTreeConfig {
|
||||
index_max_keys_per_shard: max_keys,
|
||||
data_freeze_threshold: freeze_threshold,
|
||||
..Default::default()
|
||||
@@ -506,8 +516,8 @@ mod tests {
|
||||
fn test_memtable_filter() {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![0, 1], false);
|
||||
// Try to build a memtable via the builder.
|
||||
let memtable = MergeTreeMemtableBuilder::new(
|
||||
MergeTreeConfig {
|
||||
let memtable = PartitionTreeMemtableBuilder::new(
|
||||
PartitionTreeConfig {
|
||||
index_max_keys_per_shard: 40,
|
||||
..Default::default()
|
||||
},
|
||||
@@ -539,4 +549,17 @@ mod tests {
|
||||
assert_eq!(timestamps, read);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_config() {
|
||||
let config = PartitionTreeConfig {
|
||||
dedup: false,
|
||||
..Default::default()
|
||||
};
|
||||
// Creates a json with dedup = false.
|
||||
let json = serde_json::to_string(&config).unwrap();
|
||||
let config: PartitionTreeConfig = serde_json::from_str(&json).unwrap();
|
||||
assert!(config.dedup);
|
||||
assert_eq!(PartitionTreeConfig::default(), config);
|
||||
}
|
||||
}
|
||||
@@ -45,9 +45,11 @@ use store_api::storage::consts::{OP_TYPE_COLUMN_NAME, SEQUENCE_COLUMN_NAME};
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::merge_tree::merger::{DataBatchKey, DataNode, DataSource, Merger};
|
||||
use crate::memtable::merge_tree::PkIndex;
|
||||
use crate::metrics::{MERGE_TREE_DATA_BUFFER_FREEZE_STAGE_ELAPSED, MERGE_TREE_READ_STAGE_ELAPSED};
|
||||
use crate::memtable::partition_tree::merger::{DataBatchKey, DataNode, DataSource, Merger};
|
||||
use crate::memtable::partition_tree::PkIndex;
|
||||
use crate::metrics::{
|
||||
PARTITION_TREE_DATA_BUFFER_FREEZE_STAGE_ELAPSED, PARTITION_TREE_READ_STAGE_ELAPSED,
|
||||
};
|
||||
|
||||
const PK_INDEX_COLUMN_NAME: &str = "__pk_index";
|
||||
|
||||
@@ -255,7 +257,7 @@ impl DataBuffer {
|
||||
|
||||
/// Builds a lazily initialized data buffer reader from [DataBuffer]
|
||||
pub fn read(&self) -> Result<DataBufferReaderBuilder> {
|
||||
let _timer = MERGE_TREE_READ_STAGE_ELAPSED
|
||||
let _timer = PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["read_data_buffer"])
|
||||
.start_timer();
|
||||
|
||||
@@ -523,7 +525,7 @@ pub(crate) struct DataBufferReader {
|
||||
|
||||
impl Drop for DataBufferReader {
|
||||
fn drop(&mut self) {
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["read_data_buffer"])
|
||||
.observe(self.elapsed_time.as_secs_f64())
|
||||
}
|
||||
@@ -780,7 +782,7 @@ impl<'a> DataPartEncoder<'a> {
|
||||
let mut bytes = Vec::with_capacity(1024);
|
||||
|
||||
let rb = {
|
||||
let _timer = MERGE_TREE_DATA_BUFFER_FREEZE_STAGE_ELAPSED
|
||||
let _timer = PARTITION_TREE_DATA_BUFFER_FREEZE_STAGE_ELAPSED
|
||||
.with_label_values(&["drain_data_buffer_to_batch"])
|
||||
.start_timer();
|
||||
drain_data_buffer_to_record_batches(
|
||||
@@ -793,7 +795,7 @@ impl<'a> DataPartEncoder<'a> {
|
||||
};
|
||||
|
||||
{
|
||||
let _timer = MERGE_TREE_DATA_BUFFER_FREEZE_STAGE_ELAPSED
|
||||
let _timer = PARTITION_TREE_DATA_BUFFER_FREEZE_STAGE_ELAPSED
|
||||
.with_label_values(&["encode"])
|
||||
.start_timer();
|
||||
let mut writer =
|
||||
@@ -837,7 +839,7 @@ pub struct DataPartReader {
|
||||
|
||||
impl Drop for DataPartReader {
|
||||
fn drop(&mut self) {
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["read_data_part"])
|
||||
.observe(self.elapsed.as_secs_f64());
|
||||
}
|
||||
@@ -973,7 +975,7 @@ impl DataParts {
|
||||
/// The returned iterator yields a record batch of one primary key at a time.
|
||||
/// The order of yielding primary keys is determined by provided weights.
|
||||
pub fn read(&self) -> Result<DataPartsReaderBuilder> {
|
||||
let _timer = MERGE_TREE_READ_STAGE_ELAPSED
|
||||
let _timer = PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["build_data_parts_reader"])
|
||||
.start_timer();
|
||||
|
||||
@@ -1030,7 +1032,7 @@ pub struct DataPartsReader {
|
||||
|
||||
impl Drop for DataPartsReader {
|
||||
fn drop(&mut self) {
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["read_data_parts"])
|
||||
.observe(self.elapsed.as_secs_f64())
|
||||
}
|
||||
@@ -15,9 +15,9 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::merge_tree::data::DataBatch;
|
||||
use crate::memtable::merge_tree::shard::DataBatchSource;
|
||||
use crate::memtable::merge_tree::PkId;
|
||||
use crate::memtable::partition_tree::data::DataBatch;
|
||||
use crate::memtable::partition_tree::shard::DataBatchSource;
|
||||
use crate::memtable::partition_tree::PkId;
|
||||
|
||||
/// A reader that dedup sorted batches from a merger.
|
||||
pub struct DedupReader<T> {
|
||||
@@ -112,7 +112,7 @@ mod tests {
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use super::*;
|
||||
use crate::memtable::merge_tree::data::{DataBuffer, DataParts, DataPartsReader};
|
||||
use crate::memtable::partition_tree::data::{DataBuffer, DataParts, DataPartsReader};
|
||||
use crate::test_util::memtable_util::{
|
||||
extract_data_batch, metadata_for_test, write_rows_to_buffer,
|
||||
};
|
||||
@@ -19,8 +19,8 @@ use std::sync::Arc;
|
||||
|
||||
use datatypes::arrow::array::{Array, ArrayBuilder, BinaryArray, BinaryBuilder};
|
||||
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::PkIndex;
|
||||
use crate::memtable::partition_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::partition_tree::PkIndex;
|
||||
use crate::metrics::MEMTABLE_DICT_BYTES;
|
||||
|
||||
/// Maximum keys in a [DictBlock].
|
||||
@@ -18,8 +18,8 @@ use std::fmt::Debug;
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::merge_tree::data::{DataBatch, DataBufferReader, DataPartReader};
|
||||
use crate::memtable::merge_tree::PkIndex;
|
||||
use crate::memtable::partition_tree::data::{DataBatch, DataBufferReader, DataPartReader};
|
||||
use crate::memtable::partition_tree::PkIndex;
|
||||
|
||||
/// Nodes of merger's heap.
|
||||
pub trait Node: Ord {
|
||||
@@ -297,7 +297,7 @@ mod tests {
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use super::*;
|
||||
use crate::memtable::merge_tree::data::{timestamp_array_to_i64_slice, DataBuffer};
|
||||
use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBuffer};
|
||||
use crate::test_util::memtable_util::{build_key_values_with_ts_seq_values, metadata_for_test};
|
||||
|
||||
fn write_rows_to_buffer(
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
//! Internal metrics of the memtable.
|
||||
|
||||
/// Metrics of writing the merge tree.
|
||||
/// Metrics of writing the partition tree.
|
||||
pub struct WriteMetrics {
|
||||
/// Size allocated by keys.
|
||||
pub key_bytes: usize,
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Partition of a merge tree.
|
||||
//! Partition of a partition tree.
|
||||
//!
|
||||
//! We only support partitioning the tree by pre-defined internal columns.
|
||||
|
||||
@@ -28,15 +28,15 @@ use store_api::storage::ColumnId;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::merge_tree::data::{DataBatch, DataParts, DATA_INIT_CAP};
|
||||
use crate::memtable::merge_tree::dedup::DedupReader;
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::shard::{
|
||||
use crate::memtable::partition_tree::data::{DataBatch, DataParts, DATA_INIT_CAP};
|
||||
use crate::memtable::partition_tree::dedup::DedupReader;
|
||||
use crate::memtable::partition_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::partition_tree::shard::{
|
||||
BoxedDataBatchSource, Shard, ShardMerger, ShardNode, ShardSource,
|
||||
};
|
||||
use crate::memtable::merge_tree::shard_builder::ShardBuilder;
|
||||
use crate::memtable::merge_tree::{MergeTreeConfig, PkId};
|
||||
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||
use crate::memtable::partition_tree::shard_builder::ShardBuilder;
|
||||
use crate::memtable::partition_tree::{PartitionTreeConfig, PkId};
|
||||
use crate::metrics::PARTITION_TREE_READ_STAGE_ELAPSED;
|
||||
use crate::read::{Batch, BatchBuilder};
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec};
|
||||
|
||||
@@ -54,7 +54,7 @@ pub type PartitionRef = Arc<Partition>;
|
||||
|
||||
impl Partition {
|
||||
/// Creates a new partition.
|
||||
pub fn new(metadata: RegionMetadataRef, config: &MergeTreeConfig) -> Self {
|
||||
pub fn new(metadata: RegionMetadataRef, config: &PartitionTreeConfig) -> Self {
|
||||
Partition {
|
||||
inner: RwLock::new(Inner::new(metadata, config)),
|
||||
dedup: config.dedup,
|
||||
@@ -193,7 +193,7 @@ impl Partition {
|
||||
/// Forks the partition.
|
||||
///
|
||||
/// Must freeze the partition before fork.
|
||||
pub fn fork(&self, metadata: &RegionMetadataRef, config: &MergeTreeConfig) -> Partition {
|
||||
pub fn fork(&self, metadata: &RegionMetadataRef, config: &PartitionTreeConfig) -> Partition {
|
||||
let (shards, shard_builder) = {
|
||||
let inner = self.inner.read().unwrap();
|
||||
debug_assert!(inner.shard_builder.is_empty());
|
||||
@@ -437,11 +437,11 @@ pub(crate) struct ReadPartitionContext {
|
||||
impl Drop for ReadPartitionContext {
|
||||
fn drop(&mut self) {
|
||||
let partition_read_source = self.metrics.read_source.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["partition_read_source"])
|
||||
.observe(partition_read_source);
|
||||
let partition_data_batch_to_batch = self.metrics.data_batch_to_batch.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["partition_data_batch_to_batch"])
|
||||
.observe(partition_data_batch_to_batch);
|
||||
|
||||
@@ -558,7 +558,7 @@ struct Inner {
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
fn new(metadata: RegionMetadataRef, config: &MergeTreeConfig) -> Self {
|
||||
fn new(metadata: RegionMetadataRef, config: &PartitionTreeConfig) -> Self {
|
||||
let (shards, current_shard_id) = if metadata.primary_key.is_empty() {
|
||||
let data_parts = DataParts::new(metadata.clone(), DATA_INIT_CAP, config.dedup);
|
||||
(
|
||||
@@ -21,15 +21,15 @@ use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::merge_tree::data::{
|
||||
use crate::memtable::partition_tree::data::{
|
||||
DataBatch, DataParts, DataPartsReader, DataPartsReaderBuilder, DATA_INIT_CAP,
|
||||
};
|
||||
use crate::memtable::merge_tree::dict::KeyDictRef;
|
||||
use crate::memtable::merge_tree::merger::{Merger, Node};
|
||||
use crate::memtable::merge_tree::partition::PrimaryKeyFilter;
|
||||
use crate::memtable::merge_tree::shard_builder::ShardBuilderReader;
|
||||
use crate::memtable::merge_tree::{PkId, PkIndex, ShardId};
|
||||
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||
use crate::memtable::partition_tree::dict::KeyDictRef;
|
||||
use crate::memtable::partition_tree::merger::{Merger, Node};
|
||||
use crate::memtable::partition_tree::partition::PrimaryKeyFilter;
|
||||
use crate::memtable::partition_tree::shard_builder::ShardBuilderReader;
|
||||
use crate::memtable::partition_tree::{PkId, PkIndex, ShardId};
|
||||
use crate::metrics::PARTITION_TREE_READ_STAGE_ELAPSED;
|
||||
|
||||
/// Shard stores data related to the same key dictionary.
|
||||
pub struct Shard {
|
||||
@@ -257,7 +257,7 @@ impl ShardReader {
|
||||
impl Drop for ShardReader {
|
||||
fn drop(&mut self) {
|
||||
let shard_prune_pk = self.prune_pk_cost.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["shard_prune_pk"])
|
||||
.observe(shard_prune_pk);
|
||||
if self.keys_before_pruning > 0 {
|
||||
@@ -427,10 +427,10 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::memtable::merge_tree::data::timestamp_array_to_i64_slice;
|
||||
use crate::memtable::merge_tree::dict::KeyDictBuilder;
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::PkIndex;
|
||||
use crate::memtable::partition_tree::data::timestamp_array_to_i64_slice;
|
||||
use crate::memtable::partition_tree::dict::KeyDictBuilder;
|
||||
use crate::memtable::partition_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::partition_tree::PkIndex;
|
||||
use crate::memtable::KeyValues;
|
||||
use crate::test_util::memtable_util::{
|
||||
build_key_values_with_ts_seq_values, encode_keys, metadata_for_test,
|
||||
@@ -22,15 +22,15 @@ use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::merge_tree::data::{
|
||||
use crate::memtable::partition_tree::data::{
|
||||
DataBatch, DataBuffer, DataBufferReader, DataBufferReaderBuilder, DataParts, DATA_INIT_CAP,
|
||||
};
|
||||
use crate::memtable::merge_tree::dict::{DictBuilderReader, KeyDictBuilder};
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::partition::PrimaryKeyFilter;
|
||||
use crate::memtable::merge_tree::shard::Shard;
|
||||
use crate::memtable::merge_tree::{MergeTreeConfig, PkId, PkIndex, ShardId};
|
||||
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||
use crate::memtable::partition_tree::dict::{DictBuilderReader, KeyDictBuilder};
|
||||
use crate::memtable::partition_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::partition_tree::partition::PrimaryKeyFilter;
|
||||
use crate::memtable::partition_tree::shard::Shard;
|
||||
use crate::memtable::partition_tree::{PartitionTreeConfig, PkId, PkIndex, ShardId};
|
||||
use crate::metrics::PARTITION_TREE_READ_STAGE_ELAPSED;
|
||||
|
||||
/// Builder to write keys and data to a shard that the key dictionary
|
||||
/// is still active.
|
||||
@@ -50,7 +50,7 @@ impl ShardBuilder {
|
||||
/// Returns a new builder.
|
||||
pub fn new(
|
||||
metadata: RegionMetadataRef,
|
||||
config: &MergeTreeConfig,
|
||||
config: &PartitionTreeConfig,
|
||||
shard_id: ShardId,
|
||||
) -> ShardBuilder {
|
||||
ShardBuilder {
|
||||
@@ -150,14 +150,14 @@ impl ShardBuilder {
|
||||
/// Scans the shard builder.
|
||||
pub fn read(&self, pk_weights_buffer: &mut Vec<u16>) -> Result<ShardBuilderReaderBuilder> {
|
||||
let dict_reader = {
|
||||
let _timer = MERGE_TREE_READ_STAGE_ELAPSED
|
||||
let _timer = PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["shard_builder_read_pk"])
|
||||
.start_timer();
|
||||
self.dict_builder.read()
|
||||
};
|
||||
|
||||
{
|
||||
let _timer = MERGE_TREE_READ_STAGE_ELAPSED
|
||||
let _timer = PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["sort_pk"])
|
||||
.start_timer();
|
||||
dict_reader.pk_weights_to_sort_data(pk_weights_buffer);
|
||||
@@ -296,7 +296,7 @@ impl ShardBuilderReader {
|
||||
impl Drop for ShardBuilderReader {
|
||||
fn drop(&mut self) {
|
||||
let shard_builder_prune_pk = self.prune_pk_cost.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
PARTITION_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["shard_builder_prune_pk"])
|
||||
.observe(shard_builder_prune_pk);
|
||||
if self.keys_before_pruning > 0 {
|
||||
@@ -315,8 +315,8 @@ impl Drop for ShardBuilderReader {
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::memtable::merge_tree::data::timestamp_array_to_i64_slice;
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::partition_tree::data::timestamp_array_to_i64_slice;
|
||||
use crate::memtable::partition_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::KeyValues;
|
||||
use crate::test_util::memtable_util::{
|
||||
build_key_values_with_ts_seq_values, encode_key_by_kv, metadata_for_test,
|
||||
@@ -355,7 +355,7 @@ mod tests {
|
||||
fn test_write_shard_builder() {
|
||||
let metadata = metadata_for_test();
|
||||
let input = input_with_key(&metadata);
|
||||
let config = MergeTreeConfig::default();
|
||||
let config = PartitionTreeConfig::default();
|
||||
let mut shard_builder = ShardBuilder::new(metadata.clone(), &config, 1);
|
||||
let mut metrics = WriteMetrics::default();
|
||||
assert!(shard_builder
|
||||
@@ -382,7 +382,7 @@ mod tests {
|
||||
fn test_write_read_shard_builder() {
|
||||
let metadata = metadata_for_test();
|
||||
let input = input_with_key(&metadata);
|
||||
let config = MergeTreeConfig::default();
|
||||
let config = PartitionTreeConfig::default();
|
||||
let mut shard_builder = ShardBuilder::new(metadata.clone(), &config, 1);
|
||||
let mut metrics = WriteMetrics::default();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user