mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 08:29:59 +00:00
Compare commits
44 Commits
v0.1.0-alp
...
show
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f6cbc378d | ||
|
|
9619940569 | ||
|
|
ed8252157a | ||
|
|
3e0fb7e75b | ||
|
|
ba3ce436df | ||
|
|
b31a6cb506 | ||
|
|
95090592f0 | ||
|
|
3a527c0fd5 | ||
|
|
819b60ca13 | ||
|
|
7169fe2989 | ||
|
|
b70672be77 | ||
|
|
a4c01f4a3a | ||
|
|
bd98a26cca | ||
|
|
1b4236d698 | ||
|
|
e8cc9b4b29 | ||
|
|
379f581780 | ||
|
|
ff6cfe8e70 | ||
|
|
5a397917c0 | ||
|
|
559880cb84 | ||
|
|
b76b27f3bf | ||
|
|
d4e0dc3685 | ||
|
|
b022556b79 | ||
|
|
bd065ea6e8 | ||
|
|
9a87f5edf8 | ||
|
|
e851b6d019 | ||
|
|
e7b92f24e8 | ||
|
|
4b8db408cf | ||
|
|
98659899c0 | ||
|
|
b1311801da | ||
|
|
f1b65d9b77 | ||
|
|
d5a2a26916 | ||
|
|
8e7e68708f | ||
|
|
9c1118b06d | ||
|
|
3fb93efbd0 | ||
|
|
3fd9c2f144 | ||
|
|
75e48c5f20 | ||
|
|
d402f83442 | ||
|
|
c5c6494e0b | ||
|
|
dc50095af3 | ||
|
|
8cd69f441e | ||
|
|
f52fc9b7d4 | ||
|
|
50d2685365 | ||
|
|
11d45e2918 | ||
|
|
30287e7e41 |
13
.github/pr-title-breaking-change-label-config.json
vendored
Normal file
13
.github/pr-title-breaking-change-label-config.json
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"LABEL": {
|
||||
"name": "breaking change",
|
||||
"color": "D93F0B"
|
||||
},
|
||||
"CHECKS": {
|
||||
"regexp": "^(?:(?!!:).)*$",
|
||||
"ignoreLabels": [
|
||||
"ignore-title"
|
||||
],
|
||||
"alwaysPassCI": true
|
||||
}
|
||||
}
|
||||
18
.github/pr-title-checker-config.json
vendored
18
.github/pr-title-checker-config.json
vendored
@@ -1,10 +1,12 @@
|
||||
{
|
||||
"LABEL": {
|
||||
"name": "Invalid PR Title",
|
||||
"color": "B60205"
|
||||
},
|
||||
"CHECKS": {
|
||||
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?:.*",
|
||||
"ignoreLabels" : ["ignore-title"]
|
||||
}
|
||||
"LABEL": {
|
||||
"name": "Invalid PR Title",
|
||||
"color": "B60205"
|
||||
},
|
||||
"CHECKS": {
|
||||
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?\\!?:.*",
|
||||
"ignoreLabels": [
|
||||
"ignore-title"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
42
.github/workflows/apidoc.yml
vendored
Normal file
42
.github/workflows/apidoc.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- 'config/**'
|
||||
- '**.md'
|
||||
- '.dockerignore'
|
||||
- 'docker/**'
|
||||
- '.gitignore'
|
||||
|
||||
name: Build API docs
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-02-26
|
||||
|
||||
jobs:
|
||||
apidoc:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: arduino/setup-protoc@v1
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- run: cargo doc --workspace --no-deps --document-private-items
|
||||
- run: |
|
||||
cat <<EOF > target/doc/index.html
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="refresh" content="0; url='greptime/'" />
|
||||
</head>
|
||||
<body></body></html>
|
||||
EOF
|
||||
- name: Publish dist directory
|
||||
uses: JamesIves/github-pages-deploy-action@v4
|
||||
with:
|
||||
folder: target/doc
|
||||
14
.github/workflows/develop.yml
vendored
14
.github/workflows/develop.yml
vendored
@@ -24,7 +24,7 @@ on:
|
||||
name: CI
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-02-14
|
||||
RUST_TOOLCHAIN: nightly-2023-02-26
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
@@ -116,6 +116,7 @@ jobs:
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest-8-cores
|
||||
timeout-minutes: 60
|
||||
needs: [clippy]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: arduino/setup-protoc@v1
|
||||
@@ -131,7 +132,7 @@ jobs:
|
||||
ETCD_VER=v3.5.7
|
||||
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
|
||||
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
|
||||
mkdir -p /tmp/etcd-download
|
||||
mkdir -p /tmp/etcd-download
|
||||
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
|
||||
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
|
||||
|
||||
@@ -188,6 +189,7 @@ jobs:
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest-8-cores
|
||||
timeout-minutes: 60
|
||||
needs: [clippy]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: arduino/setup-protoc@v1
|
||||
@@ -205,10 +207,16 @@ jobs:
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Install latest nextest release
|
||||
uses: taiki-e/install-action@nextest
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install PyArrow Package
|
||||
run: pip install pyarrow
|
||||
- name: Install cargo-llvm-cov
|
||||
uses: taiki-e/install-action@cargo-llvm-cov
|
||||
- name: Collect coverage data
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
|
||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
9
.github/workflows/pr-title-checker.yml
vendored
9
.github/workflows/pr-title-checker.yml
vendored
@@ -18,3 +18,12 @@ jobs:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
pass_on_octokit_error: false
|
||||
configuration_path: ".github/pr-title-checker-config.json"
|
||||
breaking:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: thehanimo/pr-title-checker@v1.3.4
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
pass_on_octokit_error: false
|
||||
configuration_path: ".github/pr-title-breaking-change-label-config.json"
|
||||
|
||||
49
.github/workflows/release.yml
vendored
49
.github/workflows/release.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
name: Release
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2023-02-14
|
||||
RUST_TOOLCHAIN: nightly-2023-02-26
|
||||
|
||||
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
@@ -30,15 +30,15 @@ jobs:
|
||||
- arch: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-2004-16-cores
|
||||
file: greptime-linux-amd64
|
||||
- arch: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-2004-16-cores
|
||||
file: greptime-linux-arm64
|
||||
- arch: aarch64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-darwin-arm64
|
||||
- arch: x86_64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-darwin-amd64
|
||||
# - arch: aarch64-unknown-linux-gnu
|
||||
# os: ubuntu-2004-16-cores
|
||||
# file: greptime-linux-arm64
|
||||
# - arch: aarch64-apple-darwin
|
||||
# os: macos-latest
|
||||
# file: greptime-darwin-arm64
|
||||
# - arch: x86_64-apple-darwin
|
||||
# os: macos-latest
|
||||
# file: greptime-darwin-amd64
|
||||
runs-on: ${{ matrix.os }}
|
||||
if: github.repository == 'GreptimeTeam/greptimedb'
|
||||
steps:
|
||||
@@ -76,10 +76,10 @@ jobs:
|
||||
ETCD_VER=v3.5.7
|
||||
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
|
||||
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
|
||||
mkdir -p /tmp/etcd-download
|
||||
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
|
||||
mkdir -p /tmp/etcd-download
|
||||
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
|
||||
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
|
||||
|
||||
|
||||
sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
|
||||
nohup etcd >/tmp/etcd.log 2>&1 &
|
||||
|
||||
@@ -193,17 +193,17 @@ jobs:
|
||||
tar xvf greptime-linux-amd64.tgz
|
||||
rm greptime-linux-amd64.tgz
|
||||
|
||||
- name: Download arm64 binary
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: greptime-linux-arm64
|
||||
path: arm64
|
||||
# - name: Download arm64 binary
|
||||
# uses: actions/download-artifact@v3
|
||||
# with:
|
||||
# name: greptime-linux-arm64
|
||||
# path: arm64
|
||||
|
||||
- name: Unzip the arm64 artifacts
|
||||
run: |
|
||||
cd arm64
|
||||
tar xvf greptime-linux-arm64.tgz
|
||||
rm greptime-linux-arm64.tgz
|
||||
# - name: Unzip the arm64 artifacts
|
||||
# run: |
|
||||
# cd arm64
|
||||
# tar xvf greptime-linux-arm64.tgz
|
||||
# rm greptime-linux-arm64.tgz
|
||||
|
||||
- name: Login to UCloud Container Registry
|
||||
uses: docker/login-action@v2
|
||||
@@ -245,7 +245,8 @@ jobs:
|
||||
context: .
|
||||
file: ./docker/ci/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
# platforms: linux/amd64,linux/arm64
|
||||
platforms: linux/amd64
|
||||
tags: |
|
||||
greptime/greptimedb:latest
|
||||
greptime/greptimedb:${{ env.IMAGE_TAG }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Welcome!
|
||||
# Welcome 👋
|
||||
|
||||
Thanks a lot for considering contributing to GreptimeDB. We believe people like you would make GreptimeDB a great product. We intend to build a community where individuals can have open talks, show respect for one another, and speak with true ❤️. Meanwhile, we are to keep transparency and make your effort count here.
|
||||
|
||||
@@ -50,34 +50,33 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
|
||||
|
||||
- To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
|
||||
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
|
||||
- Make sure all unit tests are passed.
|
||||
- Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
|
||||
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
|
||||
|
||||
#### `pre-commit` Hooks
|
||||
|
||||
You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.
|
||||
|
||||
1. Install `pre-commit`
|
||||
```
|
||||
$ pip install pre-commit
|
||||
```
|
||||
or
|
||||
```
|
||||
$ brew install pre-commit
|
||||
```
|
||||
|
||||
pip install pre-commit
|
||||
|
||||
or
|
||||
|
||||
brew install pre-commit
|
||||
|
||||
2. Install the `pre-commit` hooks
|
||||
```
|
||||
$ pre-commit install
|
||||
pre-commit installed at .git/hooks/pre-commit
|
||||
|
||||
$ pre-commit install --hook-type commit-msg
|
||||
pre-commit installed at .git/hooks/commit-msg
|
||||
$ pre-commit install
|
||||
pre-commit installed at .git/hooks/pre-commit
|
||||
|
||||
$ pre-commit install --hook-type pre-push
|
||||
pre-commit installed at .git/hooks/pre-pus
|
||||
```
|
||||
$ pre-commit install --hook-type commit-msg
|
||||
pre-commit installed at .git/hooks/commit-msg
|
||||
|
||||
now `pre-commit` will run automatically on `git commit`.
|
||||
$ pre-commit install --hook-type pre-push
|
||||
pre-commit installed at .git/hooks/pre-push
|
||||
|
||||
Now, `pre-commit` will run automatically on `git commit`.
|
||||
|
||||
### Title
|
||||
|
||||
@@ -102,10 +101,12 @@ of what you were trying to do and what went wrong. You can also reach for help i
|
||||
## Community
|
||||
|
||||
The core team will be thrilled if you participate in any way you like. When you are stuck, try ask for help by filing an issue, with a detailed description of what you were trying to do and what went wrong. If you have any questions or if you would like to get involved in our community, please check out:
|
||||
|
||||
- [GreptimeDB Community Slack](https://greptime.com/slack)
|
||||
- [GreptimeDB Github Discussions](https://github.com/GreptimeTeam/greptimedb/discussions)
|
||||
|
||||
Also, see some extra GreptimeDB content:
|
||||
|
||||
- [GreptimeDB Docs](https://greptime.com/docs)
|
||||
- [Learn GreptimeDB](https://greptime.com/products/db)
|
||||
- [Greptime Inc. Website](https://greptime.com)
|
||||
|
||||
550
Cargo.lock
generated
550
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -12,12 +12,14 @@ members = [
|
||||
"src/common/function-macro",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/mem-prof",
|
||||
"src/common/procedure",
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
"src/common/runtime",
|
||||
"src/common/substrait",
|
||||
"src/common/telemetry",
|
||||
"src/common/test-util",
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
@@ -48,29 +50,29 @@ edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow = "29.0"
|
||||
arrow-array = "29.0"
|
||||
arrow-flight = "29.0"
|
||||
arrow-schema = { version = "29.0", features = ["serde"] }
|
||||
arrow = { version = "33.0", features = ["pyarrow"] }
|
||||
arrow-array = "33.0"
|
||||
arrow-flight = "33.0"
|
||||
arrow-schema = { version = "33.0", features = ["serde"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
# TODO(LFC): Use released Datafusion when it officially dependent on Arrow 29.0
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
|
||||
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
parquet = "29.0"
|
||||
parquet = "33.0"
|
||||
paste = "1.0"
|
||||
prost = "0.11"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sqlparser = "0.28"
|
||||
sqlparser = "0.30"
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.24.2", features = ["full"] }
|
||||
tokio-util = "0.7"
|
||||
tonic = { version = "0.8", features = ["tls"] }
|
||||
|
||||
@@ -61,6 +61,12 @@ To compile GreptimeDB from source, you'll need:
|
||||
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
|
||||
keyword. You can check it with `protoc --version`.
|
||||
- python3-dev or python3-devel(Optional, only needed if you want to run scripts
|
||||
in cpython): this install a Python shared library required for running python
|
||||
scripting engine(In CPython Mode). This is available as `python3-dev` on
|
||||
ubuntu, you can install it with `sudo apt install python3-dev`, or
|
||||
`python3-devel` on RPM based distributions (e.g. Fedora, Red Hat, SuSE). Mac's
|
||||
`Python3` package should have this shared library by default.
|
||||
|
||||
#### Build with Docker
|
||||
|
||||
|
||||
@@ -208,6 +208,7 @@ fn build_values(column: &ArrayRef) -> Values {
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal128(_, _)
|
||||
| DataType::Decimal256(_, _)
|
||||
| DataType::RunEndEncoded(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +1,52 @@
|
||||
node_id = 42
|
||||
mode = 'distributed'
|
||||
rpc_addr = '127.0.0.1:3001'
|
||||
rpc_hostname = '127.0.0.1'
|
||||
rpc_runtime_size = 8
|
||||
mysql_addr = '127.0.0.1:4406'
|
||||
mysql_runtime_size = 4
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
# Whether to use in-memory catalog, see `standalone.example.toml`.
|
||||
enable_memory_catalog = false
|
||||
# The datanode identifier, should be unique.
|
||||
node_id = 42
|
||||
# gRPC server address, "127.0.0.1:3001" by default.
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
# Hostname of this node.
|
||||
rpc_hostname = "127.0.0.1"
|
||||
# The number of gRPC server worker threads, 8 by default.
|
||||
rpc_runtime_size = 8
|
||||
# MySQL server address, "127.0.0.1:4406" by default.
|
||||
mysql_addr = "127.0.0.1:4406"
|
||||
# The number of MySQL server worker threads, 2 by default.
|
||||
mysql_runtime_size = 2
|
||||
|
||||
# Metasrv client options.
|
||||
[meta_client_options]
|
||||
# Metasrv address list.
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
# Operation timeout in milliseconds, 3000 by default.
|
||||
timeout_millis = 3000
|
||||
# Connect server timeout in milliseconds, 5000 by default.
|
||||
connect_timeout_millis = 5000
|
||||
# `TCP_NODELAY` option for accepted connections, true by default.
|
||||
tcp_nodelay = true
|
||||
|
||||
# WAL options, see `standalone.example.toml`.
|
||||
[wal]
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
file_size = '1GB'
|
||||
purge_interval = '10m'
|
||||
purge_threshold = '50GB'
|
||||
file_size = "1GB"
|
||||
purge_threshold = "50GB"
|
||||
purge_interval = "10m"
|
||||
read_batch_size = 128
|
||||
sync_write = false
|
||||
|
||||
# Storage options, see `standalone.example.toml`.
|
||||
[storage]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/data/'
|
||||
|
||||
[meta_client_options]
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = false
|
||||
type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
|
||||
# Compaction options, see `standalone.example.toml`.
|
||||
[compaction]
|
||||
max_inflight_tasks = 4
|
||||
max_files_in_level0 = 16
|
||||
max_files_in_level0 = 8
|
||||
max_purge_tasks = 32
|
||||
|
||||
[procedure.store]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/procedure/'
|
||||
# Procedure storage options, see `standalone.example.toml`.
|
||||
# [procedure.store]
|
||||
# type = 'File'
|
||||
# data_dir = '/tmp/greptimedb/procedure/'
|
||||
|
||||
@@ -1,12 +1,58 @@
|
||||
mode = 'distributed'
|
||||
datanode_rpc_addr = '127.0.0.1:3001'
|
||||
# Node running mode, see `standalone.example.toml`.
|
||||
mode = "distributed"
|
||||
|
||||
# HTTP server options, see `standalone.example.toml`.
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "30s"
|
||||
|
||||
# gRPC server options, see `standalone.example.toml`.
|
||||
[grpc_options]
|
||||
addr = "127.0.0.1:4001"
|
||||
runtime_size = 8
|
||||
|
||||
# MySQL server options, see `standalone.example.toml`.
|
||||
[mysql_options]
|
||||
addr = "127.0.0.1:4002"
|
||||
runtime_size = 2
|
||||
|
||||
# MySQL server TLS options, see `standalone.example.toml`.
|
||||
[mysql_options.tls]
|
||||
mode = "disable"
|
||||
cert_path = ""
|
||||
key_path = ""
|
||||
|
||||
# PostgresSQL server options, see `standalone.example.toml`.
|
||||
[postgres_options]
|
||||
addr = "127.0.0.1:4003"
|
||||
runtime_size = 2
|
||||
|
||||
# PostgresSQL server TLS options, see `standalone.example.toml`.
|
||||
[postgres_options.tls]
|
||||
mode = "disable"
|
||||
cert_path = ""
|
||||
key_path = ""
|
||||
|
||||
# OpenTSDB protocol options, see `standalone.example.toml`.
|
||||
[opentsdb_options]
|
||||
addr = "127.0.0.1:4242"
|
||||
runtime_size = 2
|
||||
|
||||
# InfluxDB protocol options, see `standalone.example.toml`.
|
||||
[influxdb_options]
|
||||
enable = true
|
||||
|
||||
# Prometheus protocol options, see `standalone.example.toml`.
|
||||
[prometheus_options]
|
||||
enable = true
|
||||
|
||||
# Prometheus protocol options, see `standalone.example.toml`.
|
||||
[prom_options]
|
||||
addr = "127.0.0.1:4004"
|
||||
|
||||
# Metasrv client options, see `datanode.example.toml`.
|
||||
[meta_client_options]
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = false
|
||||
tcp_nodelay = true
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
bind_addr = '127.0.0.1:3002'
|
||||
server_addr = '127.0.0.1:3002'
|
||||
store_addr = '127.0.0.1:2379'
|
||||
# The bind address of metasrv, "127.0.0.1:3002" by default.
|
||||
bind_addr = "127.0.0.1:3002"
|
||||
# The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
|
||||
server_addr = "127.0.0.1:3002"
|
||||
# Etcd server address, "127.0.0.1:2379" by default.
|
||||
store_addr = "127.0.0.1:2379"
|
||||
# Datanode lease in seconds, 15 seconds by default.
|
||||
datanode_lease_secs = 15
|
||||
# selector: 'LeaseBased', 'LoadBased'
|
||||
selector = 'LeaseBased'
|
||||
# Datanode selector type.
|
||||
# - "LeaseBased" (default value).
|
||||
# - "LoadBased"
|
||||
# For details, please see "https://docs.greptime.com/developer-guide/meta/selector".
|
||||
selector = "LeaseBased"
|
||||
# Store data in memory, false by default.
|
||||
use_memory_store = false
|
||||
|
||||
@@ -1,47 +1,116 @@
|
||||
node_id = 0
|
||||
mode = 'standalone'
|
||||
# Node running mode, "standalone" or "distributed".
|
||||
mode = "standalone"
|
||||
# Whether to use in-memory catalog, `false` by default.
|
||||
enable_memory_catalog = false
|
||||
|
||||
# HTTP server options.
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
# Server address, "127.0.0.1:4000" by default.
|
||||
addr = "127.0.0.1:4000"
|
||||
# HTTP request timeout, 30s by default.
|
||||
timeout = "30s"
|
||||
|
||||
[wal]
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
file_size = '1GB'
|
||||
purge_interval = '10m'
|
||||
purge_threshold = '50GB'
|
||||
read_batch_size = 128
|
||||
sync_write = false
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/data/'
|
||||
|
||||
# gRPC server options.
|
||||
[grpc_options]
|
||||
addr = '127.0.0.1:4001'
|
||||
# Server address, "127.0.0.1:4001" by default.
|
||||
addr = "127.0.0.1:4001"
|
||||
# The number of server worker threads, 8 by default.
|
||||
runtime_size = 8
|
||||
|
||||
# MySQL server options.
|
||||
[mysql_options]
|
||||
addr = '127.0.0.1:4002'
|
||||
# Server address, "127.0.0.1:4002" by default.
|
||||
addr = "127.0.0.1:4002"
|
||||
# The number of server worker threads, 2 by default.
|
||||
runtime_size = 2
|
||||
|
||||
[influxdb_options]
|
||||
enable = true
|
||||
|
||||
[opentsdb_options]
|
||||
addr = '127.0.0.1:4242'
|
||||
enable = true
|
||||
runtime_size = 2
|
||||
|
||||
[prometheus_options]
|
||||
enable = true
|
||||
# MySQL server TLS options.
|
||||
[mysql_options.tls]
|
||||
# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
|
||||
# - "disable" (default value)
|
||||
# - "prefer"
|
||||
# - "require"
|
||||
# - "verify-ca"
|
||||
# - "verify-full"
|
||||
mode = "disable"
|
||||
# Certificate file path.
|
||||
cert_path = ""
|
||||
# Private key file path.
|
||||
key_path = ""
|
||||
|
||||
# PostgresSQL server options.
|
||||
[postgres_options]
|
||||
addr = '127.0.0.1:4003'
|
||||
# Server address, "127.0.0.1:4003" by default.
|
||||
addr = "127.0.0.1:4003"
|
||||
# The number of server worker threads, 2 by default.
|
||||
runtime_size = 2
|
||||
check_pwd = false
|
||||
|
||||
[procedure.store]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/procedure/'
|
||||
# PostgresSQL server TLS options, see `[mysql_options.tls]` section.
|
||||
[postgres_options.tls]
|
||||
# TLS mode.
|
||||
mode = "disable"
|
||||
# certificate file path.
|
||||
cert_path = ""
|
||||
# private key file path.
|
||||
key_path = ""
|
||||
|
||||
# OpenTSDB protocol options.
|
||||
[opentsdb_options]
|
||||
# OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
|
||||
addr = "127.0.0.1:4242"
|
||||
# The number of server worker threads, 2 by default.
|
||||
runtime_size = 2
|
||||
|
||||
# InfluxDB protocol options.
|
||||
[influxdb_options]
|
||||
# Whether to enable InfluxDB protocol in HTTP API, true by default.
|
||||
enable = true
|
||||
|
||||
# Prometheus protocol options.
|
||||
[prometheus_options]
|
||||
# Whether to enable Prometheus remote write and read in HTTP API, true by default.
|
||||
enable = true
|
||||
|
||||
# Prom protocol options.
|
||||
[prom_options]
|
||||
# Prometheus API server address, "127.0.0.1:4004" by default.
|
||||
addr = "127.0.0.1:4004"
|
||||
|
||||
# WAL options.
|
||||
[wal]
|
||||
# WAL data directory.
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
# WAL file size in bytes.
|
||||
file_size = "1GB"
|
||||
# WAL purge threshold in bytes.
|
||||
purge_threshold = "50GB"
|
||||
# WAL purge interval in seconds.
|
||||
purge_interval = "10m"
|
||||
# WAL read batch size.
|
||||
read_batch_size = 128
|
||||
# Whether to sync log file after every write.
|
||||
sync_write = false
|
||||
|
||||
# Storage options.
|
||||
[storage]
|
||||
# Storage type.
|
||||
type = "File"
|
||||
# Data directory, "/tmp/greptimedb/data" by default.
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
|
||||
# Compaction options.
|
||||
[compaction]
|
||||
# Max task number that can concurrently run.
|
||||
max_inflight_tasks = 4
|
||||
# Max files in level 0 to trigger compaction.
|
||||
max_files_in_level0 = 8
|
||||
# Max task number for SST purge task after compaction.
|
||||
max_purge_tasks = 32
|
||||
|
||||
# Procedure storage options.
|
||||
# Uncomment to enable.
|
||||
# [procedure.store]
|
||||
# # Storage type.
|
||||
# type = "File"
|
||||
# # Procedure data path.
|
||||
# data_dir = "/tmp/greptimedb/procedure/"
|
||||
|
||||
@@ -9,7 +9,10 @@ RUN apt-get update && apt-get install -y \
|
||||
protobuf-compiler \
|
||||
curl \
|
||||
build-essential \
|
||||
pkg-config
|
||||
pkg-config \
|
||||
python3 \
|
||||
python3-dev \
|
||||
&& pip install pyarrow
|
||||
|
||||
# Install Rust.
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "nightly-2023-02-14"
|
||||
channel = "nightly-2023-02-26"
|
||||
|
||||
@@ -10,7 +10,7 @@ common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1599ae2a0d1d8f42ee23ed26e4ad7a7b34134c60" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ad0187295035e83f76272da553453e649b7570de" }
|
||||
prost.workspace = true
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tonic.workspace = true
|
||||
|
||||
@@ -18,25 +18,28 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
dashmap = "5.4"
|
||||
datafusion.workspace = true
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
parking_lot = "0.12"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
session = { path = "../session" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
storage = { path = "../storage" }
|
||||
table = { path = "../table" }
|
||||
tokio.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util = { path = "../common/test-util" }
|
||||
chrono.workspace = true
|
||||
log-store = { path = "../log-store" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
storage = { path = "../storage" }
|
||||
tempdir = "0.3"
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -201,6 +201,9 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: common_catalog::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
|
||||
QueryAccessDenied { catalog: String, schema: String },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -246,6 +249,7 @@ impl ErrorExt for Error {
|
||||
}
|
||||
|
||||
Error::Unimplemented { .. } => StatusCode::Unsupported,
|
||||
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ pub mod local;
|
||||
pub mod remote;
|
||||
pub mod schema;
|
||||
pub mod system;
|
||||
pub mod table_source;
|
||||
pub mod tables;
|
||||
|
||||
/// Represent a list of named catalogs
|
||||
@@ -107,7 +108,12 @@ pub trait CatalogManager: CatalogList {
|
||||
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>>;
|
||||
|
||||
/// Returns the table by catalog, schema and table name.
|
||||
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
|
||||
async fn table(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
) -> Result<Option<TableRef>>;
|
||||
}
|
||||
|
||||
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
|
||||
@@ -186,7 +192,8 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
|
||||
let table_name = &req.create_table_request.table_name;
|
||||
let table_id = req.create_table_request.id;
|
||||
|
||||
let table = if let Some(table) = manager.table(catalog_name, schema_name, table_name)? {
|
||||
let table = manager.table(catalog_name, schema_name, table_name).await?;
|
||||
let table = if let Some(table) = table {
|
||||
table
|
||||
} else {
|
||||
let table = engine
|
||||
@@ -219,7 +226,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
|
||||
}
|
||||
|
||||
/// The number of regions in the datanode node.
|
||||
pub fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
|
||||
pub async fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
|
||||
let mut region_number: u64 = 0;
|
||||
|
||||
for catalog_name in catalog_manager.catalog_names()? {
|
||||
@@ -239,11 +246,13 @@ pub fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
|
||||
})?;
|
||||
|
||||
for table_name in schema.table_names()? {
|
||||
let table = schema
|
||||
.table(&table_name)?
|
||||
.context(error::TableNotFoundSnafu {
|
||||
table_info: &table_name,
|
||||
})?;
|
||||
let table =
|
||||
schema
|
||||
.table(&table_name)
|
||||
.await?
|
||||
.context(error::TableNotFoundSnafu {
|
||||
table_info: &table_name,
|
||||
})?;
|
||||
|
||||
let region_numbers = &table.table_info().meta.region_numbers;
|
||||
region_number += region_numbers.len() as u64;
|
||||
|
||||
@@ -345,7 +345,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
if let Some(existing) = schema.table(&request.table_name)? {
|
||||
if let Some(existing) = schema.table(&request.table_name).await? {
|
||||
if existing.table_info().ident.table_id != request.table_id {
|
||||
error!(
|
||||
"Unexpected table register request: {:?}, existing: {:?}",
|
||||
@@ -434,9 +434,10 @@ impl CatalogManager for LocalCatalogManager {
|
||||
} = &request;
|
||||
let table_id = self
|
||||
.catalogs
|
||||
.table(catalog, schema, table_name)?
|
||||
.table(catalog, schema, table_name)
|
||||
.await?
|
||||
.with_context(|| error::TableNotExistSnafu {
|
||||
table: format!("{catalog}.{schema}.{table_name}"),
|
||||
table: format_full_table_name(catalog, schema, table_name),
|
||||
})?
|
||||
.table_info()
|
||||
.ident
|
||||
@@ -505,7 +506,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
.schema(schema)
|
||||
}
|
||||
|
||||
fn table(
|
||||
async fn table(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
@@ -521,7 +522,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
})?;
|
||||
schema.table(table_name)
|
||||
schema.table(table_name).await
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_telemetry::error;
|
||||
use snafu::{ensure, OptionExt};
|
||||
@@ -155,16 +156,20 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
}
|
||||
}
|
||||
|
||||
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>> {
|
||||
let c = self.catalogs.read().unwrap();
|
||||
let catalog = if let Some(c) = c.get(catalog) {
|
||||
async fn table(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
) -> Result<Option<TableRef>> {
|
||||
let catalog = {
|
||||
let c = self.catalogs.read().unwrap();
|
||||
let Some(c) = c.get(catalog) else { return Ok(None) };
|
||||
c.clone()
|
||||
} else {
|
||||
return Ok(None);
|
||||
};
|
||||
match catalog.schema(schema)? {
|
||||
None => Ok(None),
|
||||
Some(s) => s.table(table_name),
|
||||
Some(s) => s.table(table_name).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -283,6 +288,7 @@ impl Default for MemorySchemaProvider {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SchemaProvider for MemorySchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -293,7 +299,7 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
Ok(tables.keys().cloned().collect())
|
||||
}
|
||||
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let tables = self.tables.read().unwrap();
|
||||
Ok(tables.get(name).cloned())
|
||||
}
|
||||
@@ -355,8 +361,8 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_new_memory_catalog_list() {
|
||||
#[tokio::test]
|
||||
async fn test_new_memory_catalog_list() {
|
||||
let catalog_list = new_memory_catalog_list().unwrap();
|
||||
let default_catalog = catalog_list.catalog(DEFAULT_CATALOG_NAME).unwrap().unwrap();
|
||||
|
||||
@@ -369,9 +375,9 @@ mod tests {
|
||||
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
|
||||
let table = default_schema.table("numbers").unwrap();
|
||||
let table = default_schema.table("numbers").await.unwrap();
|
||||
assert!(table.is_some());
|
||||
assert!(default_schema.table("not_exists").unwrap().is_none());
|
||||
assert!(default_schema.table("not_exists").await.unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -419,7 +425,7 @@ mod tests {
|
||||
|
||||
// test new table name exists
|
||||
assert!(provider.table_exist(new_table_name).unwrap());
|
||||
let registered_table = provider.table(new_table_name).unwrap().unwrap();
|
||||
let registered_table = provider.table(new_table_name).await.unwrap().unwrap();
|
||||
assert_eq!(
|
||||
registered_table.table_info().ident.table_id,
|
||||
test_table.table_info().ident.table_id
|
||||
@@ -468,6 +474,7 @@ mod tests {
|
||||
|
||||
let registered_table = catalog
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(registered_table.table_info().ident.table_id, table_id);
|
||||
|
||||
@@ -13,16 +13,19 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::{debug, error, info};
|
||||
use dashmap::DashMap;
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use parking_lot::RwLock;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::TableId;
|
||||
@@ -38,6 +41,7 @@ use crate::error::{
|
||||
use crate::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
CATALOG_KEY_PREFIX,
|
||||
};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
@@ -50,10 +54,9 @@ use crate::{
|
||||
pub struct RemoteCatalogManager {
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
|
||||
catalogs: Arc<RwLock<DashMap<String, CatalogProviderRef>>>,
|
||||
engine: TableEngineRef,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
mutex: Arc<Mutex<()>>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogManager {
|
||||
@@ -64,7 +67,6 @@ impl RemoteCatalogManager {
|
||||
backend,
|
||||
catalogs: Default::default(),
|
||||
system_table_requests: Default::default(),
|
||||
mutex: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,9 +110,13 @@ impl RemoteCatalogManager {
|
||||
debug!("Ignoring non-catalog key: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
}
|
||||
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
yield Ok(key)
|
||||
|
||||
let catalog_key = String::from_utf8_lossy(&k);
|
||||
if let Ok(key) = CatalogKey::parse(&catalog_key) {
|
||||
yield Ok(key)
|
||||
} else {
|
||||
error!("Invalid catalog key: {:?}", catalog_key);
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
@@ -381,7 +387,14 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
"Initialized catalogs: {:?}",
|
||||
catalogs.keys().cloned().collect::<Vec<_>>()
|
||||
);
|
||||
self.catalogs.store(Arc::new(catalogs));
|
||||
|
||||
{
|
||||
let self_catalogs = self.catalogs.read();
|
||||
catalogs.into_iter().for_each(|(k, v)| {
|
||||
self_catalogs.insert(k, v);
|
||||
});
|
||||
}
|
||||
|
||||
info!("Max table id allocated: {}", max_table_id);
|
||||
|
||||
let mut system_table_requests = self.system_table_requests.lock().await;
|
||||
@@ -468,7 +481,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.schema(schema)
|
||||
}
|
||||
|
||||
fn table(
|
||||
async fn table(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
@@ -483,7 +496,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
})?;
|
||||
schema.table(table_name)
|
||||
schema.table(table_name).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -499,12 +512,10 @@ impl CatalogList for RemoteCatalogManager {
|
||||
) -> Result<Option<CatalogProviderRef>> {
|
||||
let key = self.build_catalog_key(&name).to_string();
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let catalogs = self.catalogs.clone();
|
||||
|
||||
std::thread::spawn(|| {
|
||||
common_runtime::block_on_write(async move {
|
||||
let _guard = mutex.lock().await;
|
||||
backend
|
||||
.set(
|
||||
key.as_bytes(),
|
||||
@@ -513,11 +524,10 @@ impl CatalogList for RemoteCatalogManager {
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
let prev_catalogs = catalogs.load();
|
||||
let mut new_catalogs = HashMap::with_capacity(prev_catalogs.len() + 1);
|
||||
new_catalogs.clone_from(&prev_catalogs);
|
||||
let prev = new_catalogs.insert(name, catalog);
|
||||
catalogs.store(Arc::new(new_catalogs));
|
||||
|
||||
let catalogs = catalogs.read();
|
||||
let prev = catalogs.insert(name, catalog.clone());
|
||||
|
||||
Ok(prev)
|
||||
})
|
||||
})
|
||||
@@ -527,12 +537,65 @@ impl CatalogList for RemoteCatalogManager {
|
||||
|
||||
/// List all catalogs from metasrv
|
||||
fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
Ok(self.catalogs.load().keys().cloned().collect::<Vec<_>>())
|
||||
let catalogs = self.catalogs.read();
|
||||
Ok(catalogs.iter().map(|k| k.key().to_string()).collect())
|
||||
}
|
||||
|
||||
/// Read catalog info of given name from metasrv.
|
||||
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
|
||||
Ok(self.catalogs.load().get(name).cloned())
|
||||
{
|
||||
let catalogs = self.catalogs.read();
|
||||
let catalog = catalogs.get(name);
|
||||
|
||||
if let Some(catalog) = catalog {
|
||||
return Ok(Some(catalog.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
let catalogs = self.catalogs.write();
|
||||
|
||||
let catalog = catalogs.get(name);
|
||||
if let Some(catalog) = catalog {
|
||||
return Ok(Some(catalog.clone()));
|
||||
}
|
||||
|
||||
// It's for lack of incremental catalog syncing between datanode and meta. Here we fetch catalog
|
||||
// from meta on demand. This can be removed when incremental catalog syncing is done in datanode.
|
||||
|
||||
let backend = self.backend.clone();
|
||||
|
||||
let catalogs_from_meta: HashSet<String> = std::thread::spawn(|| {
|
||||
common_runtime::block_on_read(async move {
|
||||
let mut stream = backend.range(CATALOG_KEY_PREFIX.as_bytes());
|
||||
let mut catalogs = HashSet::new();
|
||||
|
||||
while let Some(catalog) = stream.next().await {
|
||||
if let Ok(catalog) = catalog {
|
||||
let catalog_key = String::from_utf8_lossy(&catalog.0);
|
||||
|
||||
if let Ok(key) = CatalogKey::parse(&catalog_key) {
|
||||
catalogs.insert(key.catalog_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
catalogs
|
||||
})
|
||||
})
|
||||
.join()
|
||||
.unwrap();
|
||||
|
||||
catalogs.retain(|catalog_name, _| catalogs_from_meta.get(catalog_name).is_some());
|
||||
|
||||
for catalog in catalogs_from_meta {
|
||||
catalogs
|
||||
.entry(catalog.clone())
|
||||
.or_insert(self.new_catalog_provider(&catalog));
|
||||
}
|
||||
|
||||
let catalog = catalogs.get(name);
|
||||
|
||||
Ok(catalog.as_deref().cloned())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -692,6 +755,7 @@ impl RemoteSchemaProvider {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SchemaProvider for RemoteSchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -701,7 +765,7 @@ impl SchemaProvider for RemoteSchemaProvider {
|
||||
Ok(self.tables.load().keys().cloned().collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
Ok(self.tables.load().get(name).cloned())
|
||||
}
|
||||
|
||||
|
||||
@@ -15,11 +15,13 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
/// Represents a schema, comprising a number of named tables.
|
||||
#[async_trait]
|
||||
pub trait SchemaProvider: Sync + Send {
|
||||
/// Returns the schema provider as [`Any`](std::any::Any)
|
||||
/// so that it can be downcast to a specific implementation.
|
||||
@@ -29,7 +31,7 @@ pub trait SchemaProvider: Sync + Send {
|
||||
fn table_names(&self) -> Result<Vec<String>>;
|
||||
|
||||
/// Retrieves a specific table from the schema by name, provided it exists.
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>>;
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>>;
|
||||
|
||||
/// If supported by the implementation, adds a new table to this schema.
|
||||
/// If a table of the same name existed before, it returns "Table already exists" error.
|
||||
|
||||
@@ -219,7 +219,7 @@ fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<Strin
|
||||
let mut m = HashMap::with_capacity(3);
|
||||
m.insert(
|
||||
"entry_type".to_string(),
|
||||
Arc::new(UInt8Vector::from_slice(&[entry_type as u8])) as _,
|
||||
Arc::new(UInt8Vector::from_slice([entry_type as u8])) as _,
|
||||
);
|
||||
m.insert(
|
||||
"key".to_string(),
|
||||
@@ -228,7 +228,7 @@ fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<Strin
|
||||
// Timestamp in key part is intentionally left to 0
|
||||
m.insert(
|
||||
"timestamp".to_string(),
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice([0])) as _,
|
||||
);
|
||||
m
|
||||
}
|
||||
@@ -258,12 +258,12 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
let now = util::current_time_millis();
|
||||
columns_values.insert(
|
||||
"gmt_created".to_string(),
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
"gmt_modified".to_string(),
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
|
||||
);
|
||||
|
||||
InsertRequest {
|
||||
@@ -395,6 +395,7 @@ pub struct TableEntryValue {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use datatypes::value::Value;
|
||||
use log_store::NoopLogStore;
|
||||
use mito::config::EngineConfig;
|
||||
@@ -405,7 +406,6 @@ mod tests {
|
||||
use storage::EngineImpl;
|
||||
use table::metadata::TableType;
|
||||
use table::metadata::TableType::Base;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -480,7 +480,7 @@ mod tests {
|
||||
}
|
||||
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = TempDir::new("system-table-test").unwrap();
|
||||
let dir = create_temp_dir("system-table-test");
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = object_store::services::Fs::default()
|
||||
.root(&store_dir)
|
||||
|
||||
178
src/catalog/src/table_source.rs
Normal file
178
src/catalog/src/table_source.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::format_full_table_name;
|
||||
use datafusion::common::{OwnedTableReference, ResolvedTableReference, TableReference};
|
||||
use datafusion::datasource::provider_as_source;
|
||||
use datafusion::logical_expr::TableSource;
|
||||
use session::context::QueryContext;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, QueryAccessDeniedSnafu, Result, SchemaNotFoundSnafu, TableNotExistSnafu,
|
||||
};
|
||||
use crate::CatalogListRef;
|
||||
|
||||
pub struct DfTableSourceProvider {
|
||||
catalog_list: CatalogListRef,
|
||||
resolved_tables: HashMap<String, Arc<dyn TableSource>>,
|
||||
disallow_cross_schema_query: bool,
|
||||
default_catalog: String,
|
||||
default_schema: String,
|
||||
}
|
||||
|
||||
impl DfTableSourceProvider {
|
||||
pub fn new(
|
||||
catalog_list: CatalogListRef,
|
||||
disallow_cross_schema_query: bool,
|
||||
query_ctx: &QueryContext,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_list,
|
||||
disallow_cross_schema_query,
|
||||
resolved_tables: HashMap::new(),
|
||||
default_catalog: query_ctx.current_catalog(),
|
||||
default_schema: query_ctx.current_schema(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_table_ref<'a>(
|
||||
&'a self,
|
||||
table_ref: TableReference<'a>,
|
||||
) -> Result<ResolvedTableReference<'a>> {
|
||||
if self.disallow_cross_schema_query {
|
||||
match &table_ref {
|
||||
TableReference::Bare { .. } => (),
|
||||
TableReference::Partial { schema, .. } => {
|
||||
ensure!(
|
||||
schema.as_ref() == self.default_schema,
|
||||
QueryAccessDeniedSnafu {
|
||||
catalog: &self.default_catalog,
|
||||
schema: schema.as_ref(),
|
||||
}
|
||||
);
|
||||
}
|
||||
TableReference::Full {
|
||||
catalog, schema, ..
|
||||
} => {
|
||||
ensure!(
|
||||
catalog.as_ref() == self.default_catalog
|
||||
&& schema.as_ref() == self.default_schema,
|
||||
QueryAccessDeniedSnafu {
|
||||
catalog: catalog.as_ref(),
|
||||
schema: schema.as_ref()
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(table_ref.resolve(&self.default_catalog, &self.default_schema))
|
||||
}
|
||||
|
||||
pub async fn resolve_table(
|
||||
&mut self,
|
||||
table_ref: OwnedTableReference,
|
||||
) -> Result<Arc<dyn TableSource>> {
|
||||
let table_ref = table_ref.as_table_reference();
|
||||
let table_ref = self.resolve_table_ref(table_ref)?;
|
||||
|
||||
let resolved_name = table_ref.to_string();
|
||||
if let Some(table) = self.resolved_tables.get(&resolved_name) {
|
||||
return Ok(table.clone());
|
||||
}
|
||||
|
||||
let catalog_name = table_ref.catalog.as_ref();
|
||||
let schema_name = table_ref.schema.as_ref();
|
||||
let table_name = table_ref.table.as_ref();
|
||||
|
||||
let catalog = self
|
||||
.catalog_list
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
let schema = catalog.schema(schema_name)?.context(SchemaNotFoundSnafu {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
})?;
|
||||
let table = schema
|
||||
.table(table_name)
|
||||
.await?
|
||||
.with_context(|| TableNotExistSnafu {
|
||||
table: format_full_table_name(catalog_name, schema_name, table_name),
|
||||
})?;
|
||||
|
||||
let table = DfTableProviderAdapter::new(table);
|
||||
let table = provider_as_source(Arc::new(table));
|
||||
self.resolved_tables.insert(resolved_name, table.clone());
|
||||
Ok(table)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use session::context::QueryContext;
|
||||
|
||||
use super::*;
|
||||
use crate::local::MemoryCatalogManager;
|
||||
|
||||
#[test]
|
||||
fn test_validate_table_ref() {
|
||||
let query_ctx = &QueryContext::with("greptime", "public");
|
||||
|
||||
let table_provider =
|
||||
DfTableSourceProvider::new(Arc::new(MemoryCatalogManager::default()), true, query_ctx);
|
||||
|
||||
let table_ref = TableReference::Bare {
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Partial {
|
||||
schema: Cow::Borrowed("wrong_schema"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_err());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("greptime"),
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let table_ref = TableReference::Full {
|
||||
catalog: Cow::Borrowed("wrong_catalog"),
|
||||
schema: Cow::Borrowed("public"),
|
||||
table: Cow::Borrowed("table_name"),
|
||||
};
|
||||
let result = table_provider.resolve_table_ref(table_ref);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,7 @@ use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::logical_plan::Expr;
|
||||
@@ -200,6 +201,7 @@ pub struct InformationSchema {
|
||||
pub system: Arc<SystemCatalogTable>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SchemaProvider for InformationSchema {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -212,7 +214,7 @@ impl SchemaProvider for InformationSchema {
|
||||
])
|
||||
}
|
||||
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
|
||||
if name.eq_ignore_ascii_case("tables") {
|
||||
Ok(Some(self.tables.clone()))
|
||||
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
|
||||
|
||||
@@ -71,6 +71,7 @@ mod tests {
|
||||
|
||||
let registered_table = catalog_manager
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(registered_table.table_info().ident.table_id, table_id);
|
||||
@@ -158,6 +159,7 @@ mod tests {
|
||||
let table = guard.as_ref().unwrap();
|
||||
let table_registered = catalog_manager
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
|
||||
@@ -55,10 +55,18 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn catalog(&self) -> &String {
|
||||
&self.catalog
|
||||
}
|
||||
|
||||
pub fn set_catalog(&mut self, catalog: impl Into<String>) {
|
||||
self.catalog = catalog.into();
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> &String {
|
||||
&self.schema
|
||||
}
|
||||
|
||||
pub fn set_schema(&mut self, schema: impl Into<String>) {
|
||||
self.schema = schema.into();
|
||||
}
|
||||
@@ -118,7 +126,7 @@ impl Database {
|
||||
request: Some(request),
|
||||
};
|
||||
let request = Ticket {
|
||||
ticket: request.encode_to_vec(),
|
||||
ticket: request.encode_to_vec().into(),
|
||||
};
|
||||
|
||||
let mut client = self.client.make_client()?;
|
||||
|
||||
@@ -9,8 +9,12 @@ default-run = "greptime"
|
||||
name = "greptime"
|
||||
path = "src/bin/greptime.rs"
|
||||
|
||||
[features]
|
||||
mem-prof = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
catalog = { path = "../catalog" }
|
||||
clap = { version = "3.1", features = ["derive"] }
|
||||
client = { path = "../client" }
|
||||
common-base = { path = "../common/base" }
|
||||
@@ -27,17 +31,24 @@ futures.workspace = true
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
nu-ansi-term = "0.46"
|
||||
partition = { path = "../partition" }
|
||||
query = { path = "../query" }
|
||||
rustyline = "10.1"
|
||||
serde.workspace = true
|
||||
servers = { path = "../servers" }
|
||||
session = { path = "../session" }
|
||||
snafu.workspace = true
|
||||
substrait = { path = "../common/substrait" }
|
||||
tikv-jemalloc-ctl = { version = "0.5", optional = true }
|
||||
tikv-jemallocator = { version = "0.5", optional = true }
|
||||
tokio.workspace = true
|
||||
toml = "0.5"
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util = { path = "../common/test-util" }
|
||||
rexpect = "0.5"
|
||||
serde.workspace = true
|
||||
tempdir = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.3"
|
||||
|
||||
@@ -87,6 +87,10 @@ fn print_version() -> &'static str {
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(feature = "mem-prof")]
|
||||
#[global_allocator]
|
||||
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cmd = Command::parse();
|
||||
|
||||
@@ -50,13 +50,15 @@ impl SubCommand {
|
||||
pub(crate) struct AttachCommand {
|
||||
#[clap(long)]
|
||||
pub(crate) grpc_addr: String,
|
||||
#[clap(long)]
|
||||
pub(crate) meta_addr: Option<String>,
|
||||
#[clap(long, action)]
|
||||
pub(crate) disable_helper: bool,
|
||||
}
|
||||
|
||||
impl AttachCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let mut repl = Repl::try_new(&self)?;
|
||||
let mut repl = Repl::try_new(&self).await?;
|
||||
repl.run().await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,24 +13,39 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use catalog::remote::MetaKvBackend;
|
||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::logging;
|
||||
use either::Either;
|
||||
use frontend::catalog::FrontendCatalogManager;
|
||||
use frontend::datanode::DatanodeClients;
|
||||
use meta_client::client::MetaClientBuilder;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use partition::route::TableRoutes;
|
||||
use query::datafusion::DatafusionQueryEngine;
|
||||
use query::logical_optimizer::LogicalOptimizer;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::QueryEngine;
|
||||
use rustyline::error::ReadlineError;
|
||||
use rustyline::Editor;
|
||||
use session::context::QueryContext;
|
||||
use snafu::{ErrorCompat, ResultExt};
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
|
||||
use crate::cli::cmd::ReplCommand;
|
||||
use crate::cli::helper::RustylineHelper;
|
||||
use crate::cli::AttachCommand;
|
||||
use crate::error::{
|
||||
CollectRecordBatchesSnafu, PrettyPrintRecordBatchesSnafu, ReadlineSnafu, ReplCreationSnafu,
|
||||
RequestDatabaseSnafu, Result,
|
||||
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
|
||||
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
|
||||
SubstraitEncodeLogicalPlanSnafu,
|
||||
};
|
||||
|
||||
/// Captures the state of the repl, gathers commands and executes them one by one
|
||||
@@ -43,6 +58,8 @@ pub(crate) struct Repl {
|
||||
|
||||
/// Client for interacting with GreptimeDB
|
||||
database: Database,
|
||||
|
||||
query_engine: Option<DatafusionQueryEngine>,
|
||||
}
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
@@ -51,7 +68,7 @@ impl Repl {
|
||||
println!("{}", ReplCommand::help())
|
||||
}
|
||||
|
||||
pub(crate) fn try_new(cmd: &AttachCommand) -> Result<Self> {
|
||||
pub(crate) async fn try_new(cmd: &AttachCommand) -> Result<Self> {
|
||||
let mut rl = Editor::new().context(ReplCreationSnafu)?;
|
||||
|
||||
if !cmd.disable_helper {
|
||||
@@ -69,10 +86,17 @@ impl Repl {
|
||||
let client = Client::with_urls([&cmd.grpc_addr]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
|
||||
let query_engine = if let Some(meta_addr) = &cmd.meta_addr {
|
||||
create_query_engine(meta_addr).await.map(Some)?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
rl,
|
||||
prompt: "> ".to_string(),
|
||||
database,
|
||||
query_engine,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -134,11 +158,29 @@ impl Repl {
|
||||
async fn do_execute_sql(&self, sql: String) -> Result<()> {
|
||||
let start = Instant::now();
|
||||
|
||||
let output = self
|
||||
.database
|
||||
.sql(&sql)
|
||||
.await
|
||||
.context(RequestDatabaseSnafu { sql: &sql })?;
|
||||
let output = if let Some(query_engine) = &self.query_engine {
|
||||
let stmt = QueryLanguageParser::parse_sql(&sql)
|
||||
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
|
||||
|
||||
let query_ctx = Arc::new(QueryContext::with(
|
||||
self.database.catalog(),
|
||||
self.database.schema(),
|
||||
));
|
||||
let LogicalPlan::DfPlan(plan) = query_engine
|
||||
.statement_to_plan(stmt, query_ctx)
|
||||
.await
|
||||
.and_then(|x| query_engine.optimize(&x))
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
let plan = DFLogicalSubstraitConvertor {}
|
||||
.encode(plan)
|
||||
.context(SubstraitEncodeLogicalPlanSnafu)?;
|
||||
|
||||
self.database.logical_plan(plan.to_vec()).await
|
||||
} else {
|
||||
self.database.sql(&sql).await
|
||||
}
|
||||
.context(RequestDatabaseSnafu { sql: &sql })?;
|
||||
|
||||
let either = match output {
|
||||
Output::Stream(s) => {
|
||||
@@ -197,3 +239,29 @@ fn history_file() -> PathBuf {
|
||||
buf.push(".greptimedb_cli_history");
|
||||
buf
|
||||
}
|
||||
|
||||
async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
let mut meta_client = MetaClientBuilder::default().enable_store().build();
|
||||
meta_client
|
||||
.start([meta_addr])
|
||||
.await
|
||||
.context(StartMetaClientSnafu)?;
|
||||
let meta_client = Arc::new(meta_client);
|
||||
|
||||
let backend = Arc::new(MetaKvBackend {
|
||||
client: meta_client.clone(),
|
||||
});
|
||||
|
||||
let table_routes = Arc::new(TableRoutes::new(meta_client));
|
||||
let partition_manager = Arc::new(PartitionRuleManager::new(table_routes));
|
||||
|
||||
let datanode_clients = Arc::new(DatanodeClients::default());
|
||||
|
||||
let catalog_list = Arc::new(FrontendCatalogManager::new(
|
||||
backend,
|
||||
partition_manager,
|
||||
datanode_clients,
|
||||
));
|
||||
|
||||
Ok(DatafusionQueryEngine::new(catalog_list, Default::default()))
|
||||
}
|
||||
|
||||
@@ -150,8 +150,10 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use datanode::datanode::{CompactionConfig, ObjectStoreConfig};
|
||||
use servers::Mode;
|
||||
|
||||
@@ -159,18 +161,57 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
enable_memory_catalog = false
|
||||
node_id = 42
|
||||
rpc_addr = "127.0.0.1:3001"
|
||||
rpc_hostname = "127.0.0.1"
|
||||
rpc_runtime_size = 8
|
||||
mysql_addr = "127.0.0.1:4406"
|
||||
mysql_runtime_size = 2
|
||||
|
||||
[meta_client_options]
|
||||
metasrv_addrs = ["127.0.0.1:3002"]
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = true
|
||||
|
||||
[wal]
|
||||
dir = "/tmp/greptimedb/wal"
|
||||
file_size = "1GB"
|
||||
purge_threshold = "50GB"
|
||||
purge_interval = "10m"
|
||||
read_batch_size = 128
|
||||
sync_write = false
|
||||
|
||||
[storage]
|
||||
type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
|
||||
[compaction]
|
||||
max_inflight_tasks = 4
|
||||
max_files_in_level0 = 8
|
||||
max_purge_tasks = 32
|
||||
"#;
|
||||
write!(file, "{}", toml_str).unwrap();
|
||||
|
||||
let cmd = StartCommand {
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
config_file: Some(file.path().to_str().unwrap().to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
let options: DatanodeOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
|
||||
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal.dir);
|
||||
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
|
||||
assert_eq!(4, options.mysql_runtime_size);
|
||||
assert_eq!(2, options.mysql_runtime_size);
|
||||
assert_eq!(Some(42), options.node_id);
|
||||
|
||||
assert_eq!(Duration::from_secs(600), options.wal.purge_interval);
|
||||
assert_eq!(1024 * 1024 * 1024, options.wal.file_size.0);
|
||||
assert_eq!(1024 * 1024 * 1024 * 50, options.wal.purge_threshold.0);
|
||||
assert!(!options.wal.sync_write);
|
||||
|
||||
let MetaClientOptions {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout_millis,
|
||||
@@ -181,7 +222,7 @@ mod tests {
|
||||
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
|
||||
assert_eq!(5000, connect_timeout_millis);
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert!(!tcp_nodelay);
|
||||
assert!(tcp_nodelay);
|
||||
|
||||
match options.storage {
|
||||
ObjectStoreConfig::File(FileConfig { data_dir }) => {
|
||||
@@ -194,7 +235,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
CompactionConfig {
|
||||
max_inflight_tasks: 4,
|
||||
max_files_in_level0: 16,
|
||||
max_files_in_level0: 8,
|
||||
max_purge_tasks: 32,
|
||||
},
|
||||
options.compaction
|
||||
@@ -232,32 +273,4 @@ mod tests {
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_config() {
|
||||
let dn_opts = DatanodeOptions::try_from(StartCommand {
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!("/tmp/greptimedb/wal", dn_opts.wal.dir);
|
||||
assert_eq!(Duration::from_secs(600), dn_opts.wal.purge_interval);
|
||||
assert_eq!(1024 * 1024 * 1024, dn_opts.wal.file_size.0);
|
||||
assert_eq!(1024 * 1024 * 1024 * 50, dn_opts.wal.purge_threshold.0);
|
||||
assert!(!dn_opts.wal.sync_write);
|
||||
assert_eq!(Some(42), dn_opts.node_id);
|
||||
let MetaClientOptions {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout_millis,
|
||||
connect_timeout_millis,
|
||||
tcp_nodelay,
|
||||
} = dn_opts.meta_client_options.unwrap();
|
||||
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert_eq!(5000, connect_timeout_millis);
|
||||
assert!(!tcp_nodelay);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,6 +103,31 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start Meta client, source: {}", source))]
|
||||
StartMetaClient {
|
||||
#[snafu(backtrace)]
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse SQL: {}, source: {}", sql, source))]
|
||||
ParseSql {
|
||||
sql: String,
|
||||
#[snafu(backtrace)]
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to plan statement, source: {}", source))]
|
||||
PlanStatement {
|
||||
#[snafu(backtrace)]
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to encode logical plan in substrait, source: {}", source))]
|
||||
SubstraitEncodeLogicalPlan {
|
||||
#[snafu(backtrace)]
|
||||
source: substrait::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -126,6 +151,11 @@ impl ErrorExt for Error {
|
||||
Error::CollectRecordBatches { source } | Error::PrettyPrintRecordBatches { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::StartMetaClient { source } => source.status_code(),
|
||||
Error::ParseSql { source, .. } | Error::PlanStatement { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::SubstraitEncodeLogicalPlan { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ use frontend::instance::Instance;
|
||||
use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use frontend::prom::PromOptions;
|
||||
use meta_client::MetaClientOptions;
|
||||
use servers::auth::UserProviderRef;
|
||||
use servers::http::HttpOptions;
|
||||
@@ -67,6 +68,8 @@ pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
prom_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
opentsdb_addr: Option<String>,
|
||||
@@ -141,6 +144,9 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.prom_addr {
|
||||
opts.prom_options = Some(PromOptions { addr });
|
||||
}
|
||||
if let Some(addr) = cmd.postgres_addr {
|
||||
opts.postgres_options = Some(PostgresOptions {
|
||||
addr,
|
||||
@@ -173,8 +179,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
|
||||
use super::*;
|
||||
@@ -184,6 +192,7 @@ mod tests {
|
||||
let command = StartCommand {
|
||||
http_addr: Some("127.0.0.1:1234".to_string()),
|
||||
grpc_addr: None,
|
||||
prom_addr: Some("127.0.0.1:4444".to_string()),
|
||||
mysql_addr: Some("127.0.0.1:5678".to_string()),
|
||||
postgres_addr: Some("127.0.0.1:5432".to_string()),
|
||||
opentsdb_addr: Some("127.0.0.1:4321".to_string()),
|
||||
@@ -207,6 +216,7 @@ mod tests {
|
||||
opts.opentsdb_options.as_ref().unwrap().addr,
|
||||
"127.0.0.1:4321"
|
||||
);
|
||||
assert_eq!(opts.prom_options.as_ref().unwrap().addr, "127.0.0.1:4444");
|
||||
|
||||
let default_opts = FrontendOptions::default();
|
||||
assert_eq!(
|
||||
@@ -231,17 +241,25 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
mode = "distributed"
|
||||
|
||||
[http_options]
|
||||
addr = "127.0.0.1:4000"
|
||||
timeout = "30s"
|
||||
"#;
|
||||
write!(file, "{}", toml_str).unwrap();
|
||||
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
prom_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/frontend.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
config_file: Some(file.path().to_str().unwrap().to_string()),
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
@@ -267,6 +285,7 @@ mod tests {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
prom_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
|
||||
@@ -113,6 +113,9 @@ impl TryFrom<StartCommand> for MetaSrvOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Write;
|
||||
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use meta_srv::selector::SelectorType;
|
||||
|
||||
use super::*;
|
||||
@@ -136,15 +139,23 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let mut file = create_named_temp_file();
|
||||
let toml_str = r#"
|
||||
bind_addr = "127.0.0.1:3002"
|
||||
server_addr = "127.0.0.1:3002"
|
||||
store_addr = "127.0.0.1:2379"
|
||||
datanode_lease_secs = 15
|
||||
selector = "LeaseBased"
|
||||
use_memory_store = false
|
||||
"#;
|
||||
write!(file, "{}", toml_str).unwrap();
|
||||
|
||||
let cmd = StartCommand {
|
||||
bind_addr: None,
|
||||
server_addr: None,
|
||||
store_addr: None,
|
||||
selector: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/metasrv.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
config_file: Some(file.path().to_str().unwrap().to_string()),
|
||||
use_memory_store: false,
|
||||
};
|
||||
let options: MetaSrvOptions = cmd.try_into().unwrap();
|
||||
|
||||
@@ -28,8 +28,8 @@ use frontend::instance::Instance as FeInstance;
|
||||
use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use frontend::prom::PromOptions;
|
||||
use frontend::prometheus::PrometheusOptions;
|
||||
use frontend::promql::PromqlOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
@@ -77,7 +77,7 @@ pub struct StandaloneOptions {
|
||||
pub opentsdb_options: Option<OpentsdbOptions>,
|
||||
pub influxdb_options: Option<InfluxdbOptions>,
|
||||
pub prometheus_options: Option<PrometheusOptions>,
|
||||
pub promql_options: Option<PromqlOptions>,
|
||||
pub prom_options: Option<PromOptions>,
|
||||
pub wal: WalConfig,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub compaction: CompactionConfig,
|
||||
@@ -96,7 +96,7 @@ impl Default for StandaloneOptions {
|
||||
opentsdb_options: Some(OpentsdbOptions::default()),
|
||||
influxdb_options: Some(InfluxdbOptions::default()),
|
||||
prometheus_options: Some(PrometheusOptions::default()),
|
||||
promql_options: Some(PromqlOptions::default()),
|
||||
prom_options: Some(PromOptions::default()),
|
||||
wal: WalConfig::default(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
compaction: CompactionConfig::default(),
|
||||
@@ -116,7 +116,7 @@ impl StandaloneOptions {
|
||||
opentsdb_options: self.opentsdb_options,
|
||||
influxdb_options: self.influxdb_options,
|
||||
prometheus_options: self.prometheus_options,
|
||||
promql_options: self.promql_options,
|
||||
prom_options: self.prom_options,
|
||||
meta_client_options: None,
|
||||
}
|
||||
}
|
||||
@@ -142,6 +142,8 @@ struct StartCommand {
|
||||
#[clap(long)]
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
prom_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
opentsdb_addr: Option<String>,
|
||||
@@ -254,6 +256,11 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
if let Some(addr) = cmd.prom_addr {
|
||||
opts.prom_options = Some(PromOptions { addr })
|
||||
}
|
||||
|
||||
if let Some(addr) = cmd.postgres_addr {
|
||||
opts.postgres_options = Some(PostgresOptions {
|
||||
addr,
|
||||
@@ -302,6 +309,7 @@ mod tests {
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
prom_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
config_file: Some(format!(
|
||||
@@ -347,6 +355,7 @@ mod tests {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
prom_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
|
||||
@@ -29,9 +29,9 @@ mod tests {
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Result;
|
||||
@@ -62,7 +62,7 @@ mod tests {
|
||||
host: "greptime.test".to_string(),
|
||||
};
|
||||
|
||||
let dir = TempDir::new("test_from_file").unwrap();
|
||||
let dir = create_temp_dir("test_from_file");
|
||||
let test_file = format!("{}/test.toml", dir.path().to_str().unwrap());
|
||||
|
||||
let s = toml::to_string(&config).unwrap();
|
||||
|
||||
@@ -18,8 +18,8 @@ mod tests {
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use rexpect::session::PtyReplSession;
|
||||
use tempdir::TempDir;
|
||||
|
||||
struct Repl {
|
||||
repl: PtyReplSession,
|
||||
@@ -48,8 +48,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_repl() {
|
||||
let data_dir = TempDir::new_in("/tmp", "data").unwrap();
|
||||
let wal_dir = TempDir::new_in("/tmp", "wal").unwrap();
|
||||
let data_dir = create_temp_dir("data");
|
||||
let wal_dir = create_temp_dir("wal");
|
||||
|
||||
let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
bin_path.push("../../target/debug");
|
||||
|
||||
@@ -20,6 +20,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
|
||||
pub struct Bytes(bytes::Bytes);
|
||||
|
||||
impl From<Bytes> for bytes::Bytes {
|
||||
fn from(value: Bytes) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bytes::Bytes> for Bytes {
|
||||
fn from(bytes: bytes::Bytes) -> Bytes {
|
||||
Bytes(bytes)
|
||||
|
||||
@@ -17,5 +17,4 @@ snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono.workspace = true
|
||||
tempdir = "0.3"
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -10,6 +10,7 @@ proc-macro = true
|
||||
[dependencies]
|
||||
quote = "1.0"
|
||||
syn = "1.0"
|
||||
proc-macro2 = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
arc-swap = "1.0"
|
||||
|
||||
@@ -12,8 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod range_fn;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
use quote::{quote, quote_spanned};
|
||||
use range_fn::process_range_fn;
|
||||
use syn::parse::Parser;
|
||||
use syn::spanned::Spanned;
|
||||
use syn::{parse_macro_input, DeriveInput, ItemStruct};
|
||||
@@ -83,3 +86,31 @@ pub fn as_aggr_func_creator(_args: TokenStream, input: TokenStream) -> TokenStre
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
/// Attribute macro to convert an arithimetic function to a range function. The annotated function
|
||||
/// should accept servaral arrays as input and return a single value as output. This procedure
|
||||
/// macro can works on any number of input parameters. Return type can be either primitive type
|
||||
/// or wrapped in `Option`.
|
||||
///
|
||||
/// # Example
|
||||
/// Take `count_over_time()` in PromQL as an example:
|
||||
/// ```rust, ignore
|
||||
/// /// The count of all values in the specified interval.
|
||||
/// #[range_fn(
|
||||
/// name = "CountOverTime",
|
||||
/// ret = "Float64Array",
|
||||
/// display_name = "prom_count_over_time"
|
||||
/// )]
|
||||
/// pub fn count_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> f64 {
|
||||
/// values.len() as f64
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Arguments
|
||||
/// - `name`: The name of the generated [ScalarUDF] struct.
|
||||
/// - `ret`: The return type of the generated UDF function.
|
||||
/// - `display_name`: The display name of the generated UDF function.
|
||||
#[proc_macro_attribute]
|
||||
pub fn range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
process_range_fn(args, input)
|
||||
}
|
||||
|
||||
230
src/common/function-macro/src/range_fn.rs
Normal file
230
src/common/function-macro/src/range_fn.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
use proc_macro2::Span;
|
||||
use quote::quote;
|
||||
use syn::punctuated::Punctuated;
|
||||
use syn::spanned::Spanned;
|
||||
use syn::token::Comma;
|
||||
use syn::{
|
||||
parse_macro_input, Attribute, AttributeArgs, FnArg, Ident, ItemFn, Meta, MetaNameValue,
|
||||
NestedMeta, Signature, Type, TypeReference, Visibility,
|
||||
};
|
||||
|
||||
/// Internal util macro to early return on error.
|
||||
macro_rules! ok {
|
||||
($item:expr) => {
|
||||
match $item {
|
||||
Ok(item) => item,
|
||||
Err(e) => return e.into_compile_error().into(),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
// extract arg map
|
||||
let arg_pairs = parse_macro_input!(args as AttributeArgs);
|
||||
let arg_span = arg_pairs[0].span();
|
||||
let arg_map = ok!(extract_arg_map(arg_pairs));
|
||||
|
||||
// decompose the fn block
|
||||
let compute_fn = parse_macro_input!(input as ItemFn);
|
||||
let ItemFn {
|
||||
attrs,
|
||||
vis,
|
||||
sig,
|
||||
block,
|
||||
} = compute_fn;
|
||||
|
||||
// extract fn arg list
|
||||
let Signature {
|
||||
inputs,
|
||||
ident: fn_name,
|
||||
..
|
||||
} = &sig;
|
||||
let arg_types = ok!(extract_input_types(inputs));
|
||||
|
||||
// build the struct and its impl block
|
||||
let struct_code = build_struct(
|
||||
attrs,
|
||||
vis,
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
ok!(get_ident(&arg_map, "display_name", arg_span)),
|
||||
);
|
||||
let calc_fn_code = build_calc_fn(
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
arg_types,
|
||||
fn_name.clone(),
|
||||
ok!(get_ident(&arg_map, "ret", arg_span)),
|
||||
);
|
||||
// preserve this fn, but remove its `pub` modifier
|
||||
let input_fn_code: TokenStream = quote! {
|
||||
#sig { #block }
|
||||
}
|
||||
.into();
|
||||
|
||||
let mut result = TokenStream::new();
|
||||
result.extend(struct_code);
|
||||
result.extend(calc_fn_code);
|
||||
result.extend(input_fn_code);
|
||||
result
|
||||
}
|
||||
|
||||
/// Extract a String <-> Ident map from the attribute args.
|
||||
fn extract_arg_map(args: Vec<NestedMeta>) -> Result<HashMap<String, Ident>, syn::Error> {
|
||||
args.into_iter()
|
||||
.map(|meta| {
|
||||
if let NestedMeta::Meta(Meta::NameValue(MetaNameValue { path, lit, .. })) = meta {
|
||||
let name = path.get_ident().unwrap().to_string();
|
||||
let ident = match lit {
|
||||
syn::Lit::Str(lit_str) => lit_str.parse::<Ident>(),
|
||||
_ => Err(syn::Error::new(
|
||||
lit.span(),
|
||||
"Unexpected attribute format. Expected `name = \"value\"`",
|
||||
)),
|
||||
}?;
|
||||
Ok((name, ident))
|
||||
} else {
|
||||
Err(syn::Error::new(
|
||||
meta.span(),
|
||||
"Unexpected attribute format. Expected `name = \"value\"`",
|
||||
))
|
||||
}
|
||||
})
|
||||
.collect::<Result<HashMap<String, Ident>, syn::Error>>()
|
||||
}
|
||||
|
||||
/// Helper function to get an Ident from the previous arg map.
|
||||
fn get_ident(map: &HashMap<String, Ident>, key: &str, span: Span) -> Result<Ident, syn::Error> {
|
||||
map.get(key)
|
||||
.cloned()
|
||||
.ok_or_else(|| syn::Error::new(span, format!("Expect attribute {key} but not found")))
|
||||
}
|
||||
|
||||
/// Extract the argument list from the annotated function.
|
||||
fn extract_input_types(inputs: &Punctuated<FnArg, Comma>) -> Result<Vec<Type>, syn::Error> {
|
||||
inputs
|
||||
.iter()
|
||||
.map(|arg| match arg {
|
||||
FnArg::Receiver(receiver) => Err(syn::Error::new(receiver.span(), "expected bool")),
|
||||
FnArg::Typed(pat_type) => Ok(*pat_type.ty.clone()),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_struct(
|
||||
attrs: Vec<Attribute>,
|
||||
vis: Visibility,
|
||||
name: Ident,
|
||||
display_name_ident: Ident,
|
||||
) -> TokenStream {
|
||||
let display_name = display_name_ident.to_string();
|
||||
quote! {
|
||||
#(#attrs)*
|
||||
#[derive(Debug)]
|
||||
#vis struct #name {}
|
||||
|
||||
impl #name {
|
||||
pub const fn name() -> &'static str {
|
||||
#display_name
|
||||
}
|
||||
|
||||
pub fn scalar_udf() -> ScalarUDF {
|
||||
ScalarUDF {
|
||||
name: Self::name().to_string(),
|
||||
signature: Signature::new(
|
||||
TypeSignature::Exact(Self::input_type()),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
|
||||
fun: Arc::new(Self::calc),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ruihang): this should be parameterized
|
||||
// time index column and value column
|
||||
fn input_type() -> Vec<DataType> {
|
||||
vec![
|
||||
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
|
||||
RangeArray::convert_data_type(DataType::Float64),
|
||||
]
|
||||
}
|
||||
|
||||
// TODO(ruihang): this should be parameterized
|
||||
fn return_type() -> DataType {
|
||||
DataType::Float64
|
||||
}
|
||||
}
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
fn build_calc_fn(
|
||||
name: Ident,
|
||||
param_types: Vec<Type>,
|
||||
fn_name: Ident,
|
||||
ret_type: Ident,
|
||||
) -> TokenStream {
|
||||
let param_names = param_types
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, ty)| Ident::new(&format!("param_{}", i), ty.span()))
|
||||
.collect::<Vec<_>>();
|
||||
let unref_param_types = param_types
|
||||
.iter()
|
||||
.map(|ty| {
|
||||
if let Type::Reference(TypeReference { elem, .. }) = ty {
|
||||
elem.as_ref().clone()
|
||||
} else {
|
||||
ty.clone()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let num_params = param_types.len();
|
||||
let param_numbers = (0..num_params).collect::<Vec<_>>();
|
||||
let range_array_names = param_names
|
||||
.iter()
|
||||
.map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
|
||||
.collect::<Vec<_>>();
|
||||
let first_range_array_name = range_array_names.first().unwrap().clone();
|
||||
|
||||
quote! {
|
||||
impl #name {
|
||||
fn calc(input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
|
||||
assert_eq!(input.len(), #num_params);
|
||||
|
||||
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.data().clone().into())?; )*
|
||||
|
||||
// TODO(ruihang): add ensure!()
|
||||
|
||||
let mut result_array = Vec::new();
|
||||
for index in 0..#first_range_array_name.len(){
|
||||
#( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*
|
||||
|
||||
// TODO(ruihang): add ensure!() to check length
|
||||
|
||||
let result = #fn_name(#( &#param_names, )*);
|
||||
result_array.push(result);
|
||||
}
|
||||
|
||||
let result = ColumnarValue::Array(Arc::new(#ret_type::from_iter(result_array)));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
.into()
|
||||
}
|
||||
@@ -14,9 +14,9 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::{ExecuteFunctionSnafu, FromScalarValueSnafu};
|
||||
use common_query::error::FromScalarValueSnafu;
|
||||
use common_query::prelude::{
|
||||
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
|
||||
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf,
|
||||
};
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::*;
|
||||
@@ -54,16 +54,8 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
|
||||
.collect();
|
||||
|
||||
let result = func_cloned.eval(func_ctx, &args.context(FromScalarValueSnafu)?);
|
||||
|
||||
let udf = if len.is_some() {
|
||||
result.map(ColumnarValue::Vector)?
|
||||
} else {
|
||||
ScalarValue::try_from_array(&result?.to_arrow_array(), 0)
|
||||
.map(ColumnarValue::Scalar)
|
||||
.context(ExecuteFunctionSnafu)?
|
||||
};
|
||||
|
||||
Ok(udf)
|
||||
let udf_result = result.map(ColumnarValue::Vector)?;
|
||||
Ok(udf_result)
|
||||
});
|
||||
|
||||
ScalarUdf::new(func.name(), &func.signature(), &return_type, &fun)
|
||||
|
||||
@@ -26,6 +26,7 @@ use common_time::{Date, DateTime};
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{ValueRef, VectorRef};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::MutableVector;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -414,11 +415,26 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
.into_iter()
|
||||
.map(|v| Value::Date(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(_) => values
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(_)) => values
|
||||
.ts_second_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::new_second(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => values
|
||||
.ts_millisecond_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => values
|
||||
.ts_microsecond_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::new_microsecond(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => values
|
||||
.ts_nanosecond_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::new_nanosecond(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
|
||||
unreachable!()
|
||||
}
|
||||
@@ -444,6 +460,7 @@ mod tests {
|
||||
use common_time::timestamp::Timestamp;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use datatypes::types::{TimestampMillisecondType, TimestampSecondType, TimestampType};
|
||||
use datatypes::value::Value;
|
||||
use snafu::ResultExt;
|
||||
use table::error::Result as TableResult;
|
||||
@@ -647,6 +664,39 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_timestamp_values() {
|
||||
// second
|
||||
let actual = convert_values(
|
||||
&ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType)),
|
||||
Values {
|
||||
ts_second_values: vec![1_i64, 2_i64, 3_i64],
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let expect = vec![
|
||||
Value::Timestamp(Timestamp::new_second(1_i64)),
|
||||
Value::Timestamp(Timestamp::new_second(2_i64)),
|
||||
Value::Timestamp(Timestamp::new_second(3_i64)),
|
||||
];
|
||||
assert_eq!(expect, actual);
|
||||
|
||||
// millisecond
|
||||
let actual = convert_values(
|
||||
&ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType)),
|
||||
Values {
|
||||
ts_millisecond_values: vec![1_i64, 2_i64, 3_i64],
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let expect = vec![
|
||||
Value::Timestamp(Timestamp::new_millisecond(1_i64)),
|
||||
Value::Timestamp(Timestamp::new_millisecond(2_i64)),
|
||||
Value::Timestamp(Timestamp::new_millisecond(3_i64)),
|
||||
];
|
||||
assert_eq!(expect, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_null() {
|
||||
let null_mask = BitVec::from_slice(&[0b0000_0001, 0b0000_1000]);
|
||||
|
||||
@@ -16,7 +16,7 @@ common-runtime = { path = "../runtime" }
|
||||
dashmap = "5.4"
|
||||
datafusion.workspace = true
|
||||
datatypes = { path = "../../datatypes" }
|
||||
flatbuffers = "22"
|
||||
flatbuffers = "23.1"
|
||||
futures = "0.3"
|
||||
prost.workspace = true
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -16,8 +16,9 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::{AffectedRows, FlightMetadata};
|
||||
use arrow_flight::utils::{flight_data_from_arrow_batch, flight_data_to_arrow_batch};
|
||||
use arrow_flight::utils::flight_data_to_arrow_batch;
|
||||
use arrow_flight::{FlightData, IpcMessage, SchemaAsIpc};
|
||||
use common_base::bytes::Bytes;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::datatypes::Schema as ArrowSchema;
|
||||
@@ -39,38 +40,58 @@ pub enum FlightMessage {
|
||||
AffectedRows(usize),
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FlightEncoder {
|
||||
write_options: writer::IpcWriteOptions,
|
||||
data_gen: writer::IpcDataGenerator,
|
||||
dictionary_tracker: writer::DictionaryTracker,
|
||||
}
|
||||
|
||||
impl Default for FlightEncoder {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
write_options: writer::IpcWriteOptions::default(),
|
||||
data_gen: writer::IpcDataGenerator::default(),
|
||||
dictionary_tracker: writer::DictionaryTracker::new(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FlightEncoder {
|
||||
pub fn encode(&self, flight_message: FlightMessage) -> FlightData {
|
||||
pub fn encode(&mut self, flight_message: FlightMessage) -> FlightData {
|
||||
match flight_message {
|
||||
FlightMessage::Schema(schema) => {
|
||||
SchemaAsIpc::new(schema.arrow_schema(), &self.write_options).into()
|
||||
}
|
||||
FlightMessage::Recordbatch(recordbatch) => {
|
||||
let (flight_dictionaries, flight_batch) = flight_data_from_arrow_batch(
|
||||
recordbatch.df_record_batch(),
|
||||
&self.write_options,
|
||||
);
|
||||
let (encoded_dictionaries, encoded_batch) = self
|
||||
.data_gen
|
||||
.encoded_batch(
|
||||
recordbatch.df_record_batch(),
|
||||
&mut self.dictionary_tracker,
|
||||
&self.write_options,
|
||||
)
|
||||
.expect("DictionaryTracker configured above to not fail on replacement");
|
||||
|
||||
// TODO(LFC): Handle dictionary as FlightData here, when we supported Arrow's Dictionary DataType.
|
||||
// Currently we don't have a datatype corresponding to Arrow's Dictionary DataType,
|
||||
// so there won't be any "dictionaries" here. Assert to be sure about it, and
|
||||
// perform a "testing guard" in case we forgot to handle the possible "dictionaries"
|
||||
// here in the future.
|
||||
debug_assert_eq!(flight_dictionaries.len(), 0);
|
||||
debug_assert_eq!(encoded_dictionaries.len(), 0);
|
||||
|
||||
flight_batch
|
||||
encoded_batch.into()
|
||||
}
|
||||
FlightMessage::AffectedRows(rows) => {
|
||||
let metadata = FlightMetadata {
|
||||
affected_rows: Some(AffectedRows { value: rows as _ }),
|
||||
}
|
||||
.encode_to_vec();
|
||||
FlightData::new(None, IpcMessage(build_none_flight_msg()), metadata, vec![])
|
||||
FlightData::new(
|
||||
None,
|
||||
IpcMessage(build_none_flight_msg().into()),
|
||||
metadata,
|
||||
vec![],
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -83,7 +104,8 @@ pub struct FlightDecoder {
|
||||
|
||||
impl FlightDecoder {
|
||||
pub fn try_decode(&mut self, flight_data: FlightData) -> Result<FlightMessage> {
|
||||
let message = root_as_message(flight_data.data_header.as_slice()).map_err(|e| {
|
||||
let bytes = flight_data.data_header.slice(..);
|
||||
let message = root_as_message(&bytes).map_err(|e| {
|
||||
InvalidFlightDataSnafu {
|
||||
reason: e.to_string(),
|
||||
}
|
||||
@@ -91,7 +113,7 @@ impl FlightDecoder {
|
||||
})?;
|
||||
match message.header_type() {
|
||||
MessageHeader::NONE => {
|
||||
let metadata = FlightMetadata::decode(flight_data.app_metadata.as_slice())
|
||||
let metadata = FlightMetadata::decode(flight_data.app_metadata)
|
||||
.context(DecodeFlightDataSnafu)?;
|
||||
if let Some(AffectedRows { value }) = metadata.affected_rows {
|
||||
return Ok(FlightMessage::AffectedRows(value as _));
|
||||
@@ -176,7 +198,7 @@ pub fn flight_messages_to_recordbatches(messages: Vec<FlightMessage>) -> Result<
|
||||
}
|
||||
}
|
||||
|
||||
fn build_none_flight_msg() -> Vec<u8> {
|
||||
fn build_none_flight_msg() -> Bytes {
|
||||
let mut builder = FlatBufferBuilder::new();
|
||||
|
||||
let mut message = arrow::ipc::MessageBuilder::new(&mut builder);
|
||||
@@ -187,7 +209,7 @@ fn build_none_flight_msg() -> Vec<u8> {
|
||||
let data = message.finish();
|
||||
builder.finish(data, None);
|
||||
|
||||
builder.finished_data().to_vec()
|
||||
builder.finished_data().into()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
20
src/common/mem-prof/Cargo.toml
Normal file
20
src/common/mem-prof/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "common-mem-prof"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
common-error = { path = "../error" }
|
||||
snafu.workspace = true
|
||||
tempfile = "3.4"
|
||||
tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
|
||||
tikv-jemallocator = "0.5"
|
||||
tokio.workspace = true
|
||||
|
||||
[dependencies.tikv-jemalloc-sys]
|
||||
version = "0.5"
|
||||
features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
50
src/common/mem-prof/README.md
Normal file
50
src/common/mem-prof/README.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# Profile memory usage of GreptimeDB
|
||||
|
||||
This crate provides an easy approach to dump memory profiling info.
|
||||
|
||||
## Prerequisites
|
||||
### jemalloc
|
||||
```bash
|
||||
# for macOS
|
||||
brew install jemalloc
|
||||
|
||||
# for Ubuntu
|
||||
sudo apt install libjemalloc-dev
|
||||
```
|
||||
|
||||
### [flamegraph](https://github.com/brendangregg/FlameGraph)
|
||||
|
||||
```bash
|
||||
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
|
||||
```
|
||||
|
||||
### Build GreptimeDB with `mem-prof` feature.
|
||||
|
||||
```bash
|
||||
cargo build --features=mem-prof
|
||||
```
|
||||
|
||||
## Profiling
|
||||
|
||||
Start GreptimeDB instance with environment variables:
|
||||
|
||||
```bash
|
||||
MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone start
|
||||
```
|
||||
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
```bash
|
||||
curl localhost:4000/v1/prof/mem > greptime.hprof
|
||||
```
|
||||
|
||||
You can periodically dump profiling data and compare them to find the delta memory usage.
|
||||
|
||||
## Analyze profiling data with flamegraph
|
||||
|
||||
To create flamegraph according to dumped profiling data:
|
||||
|
||||
```bash
|
||||
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
|
||||
```
|
||||
|
||||
66
src/common/mem-prof/src/error.rs
Normal file
66
src/common/mem-prof/src/error.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use snafu::{Backtrace, Snafu};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to read OPT_PROF"))]
|
||||
ReadOptProf { source: tikv_jemalloc_ctl::Error },
|
||||
|
||||
#[snafu(display("Memory profiling is not enabled"))]
|
||||
ProfilingNotEnabled,
|
||||
|
||||
#[snafu(display("Failed to build temp file from given path: {:?}", path))]
|
||||
BuildTempPath { path: PathBuf, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to open temp file: {}", path))]
|
||||
OpenTempFile {
|
||||
path: String,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to dump profiling data to temp file: {:?}", path))]
|
||||
DumpProfileData {
|
||||
path: PathBuf,
|
||||
source: tikv_jemalloc_ctl::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::ReadOptProf { .. } => StatusCode::Internal,
|
||||
Error::ProfilingNotEnabled => StatusCode::InvalidArguments,
|
||||
Error::BuildTempPath { .. } => StatusCode::Internal,
|
||||
Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
|
||||
Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
snafu::ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
74
src/common/mem-prof/src/lib.rs
Normal file
74
src/common/mem-prof/src/lib.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
|
||||
use std::ffi::{c_char, CString};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
use crate::error::{
|
||||
BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
|
||||
ReadOptProfSnafu,
|
||||
};
|
||||
|
||||
const PROF_DUMP: &[u8] = b"prof.dump\0";
|
||||
const OPT_PROF: &[u8] = b"opt.prof\0";
|
||||
|
||||
pub async fn dump_profile() -> error::Result<Vec<u8>> {
|
||||
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
|
||||
let tmp_path = tempfile::tempdir().map_err(|_| {
|
||||
BuildTempPathSnafu {
|
||||
path: std::env::temp_dir(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let mut path_buf = PathBuf::from(tmp_path.path());
|
||||
path_buf.push("greptimedb.hprof");
|
||||
|
||||
let path = path_buf
|
||||
.to_str()
|
||||
.ok_or_else(|| BuildTempPathSnafu { path: &path_buf }.build())?
|
||||
.to_string();
|
||||
|
||||
let mut bytes = CString::new(path.as_str())
|
||||
.map_err(|_| BuildTempPathSnafu { path: &path_buf }.build())?
|
||||
.into_bytes_with_nul();
|
||||
|
||||
{
|
||||
// #safety: we always expect a valid temp file path to write profiling data to.
|
||||
let ptr = bytes.as_mut_ptr() as *mut c_char;
|
||||
unsafe {
|
||||
tikv_jemalloc_ctl::raw::write(PROF_DUMP, ptr)
|
||||
.context(DumpProfileDataSnafu { path: path_buf })?
|
||||
}
|
||||
}
|
||||
|
||||
let mut f = tokio::fs::File::open(path.as_str())
|
||||
.await
|
||||
.context(OpenTempFileSnafu { path: &path })?;
|
||||
let mut buf = vec![];
|
||||
f.read_to_end(&mut buf)
|
||||
.await
|
||||
.context(OpenTempFileSnafu { path })?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn is_prof_enabled() -> error::Result<bool> {
|
||||
// safety: OPT_PROF variable, if present, is always a boolean value.
|
||||
Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
|
||||
}
|
||||
@@ -19,5 +19,5 @@ tokio.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util = { path = "../test-util" }
|
||||
futures-util.workspace = true
|
||||
tempdir = "0.3"
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_error::prelude::*;
|
||||
|
||||
@@ -81,6 +82,21 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Procedure panics, procedure_id: {}", procedure_id))]
|
||||
ProcedurePanic { procedure_id: ProcedureId },
|
||||
|
||||
#[snafu(display("Failed to wait watcher, source: {}", source))]
|
||||
WaitWatcher {
|
||||
source: tokio::sync::watch::error::RecvError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute procedure, source: {}", source))]
|
||||
ProcedureExec {
|
||||
source: Arc<Error>,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -95,10 +111,13 @@ impl ErrorExt for Error {
|
||||
| Error::ListState { .. }
|
||||
| Error::ReadState { .. }
|
||||
| Error::FromJson { .. }
|
||||
| Error::RetryLater { .. } => StatusCode::Internal,
|
||||
| Error::RetryLater { .. }
|
||||
| Error::WaitWatcher { .. } => StatusCode::Internal,
|
||||
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::ProcedurePanic { .. } => StatusCode::Unexpected,
|
||||
Error::ProcedureExec { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,9 +18,11 @@ pub mod error;
|
||||
pub mod local;
|
||||
mod procedure;
|
||||
mod store;
|
||||
pub mod watcher;
|
||||
|
||||
pub use crate::error::{Error, Result};
|
||||
pub use crate::procedure::{
|
||||
BoxedProcedure, Context, ContextProvider, LockKey, Procedure, ProcedureId, ProcedureManager,
|
||||
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, Watcher,
|
||||
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status,
|
||||
};
|
||||
pub use crate::watcher::Watcher;
|
||||
|
||||
@@ -334,7 +334,7 @@ impl LocalManager {
|
||||
|
||||
common_runtime::spawn_bg(async move {
|
||||
// Run the root procedure.
|
||||
let _ = runner.run().await;
|
||||
runner.run().await;
|
||||
});
|
||||
|
||||
Ok(watcher)
|
||||
@@ -409,9 +409,9 @@ impl ProcedureManager for LocalManager {
|
||||
/// Create a new [ProcedureMeta] for test purpose.
|
||||
#[cfg(test)]
|
||||
mod test_util {
|
||||
use common_test_util::temp_dir::TempDir;
|
||||
use object_store::services::Fs as Builder;
|
||||
use object_store::ObjectStoreBuilder;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -430,7 +430,7 @@ mod test_util {
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use common_error::prelude::StatusCode;
|
||||
use tempdir::TempDir;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
@@ -447,9 +447,9 @@ mod tests {
|
||||
assert!(ctx.try_insert_procedure(meta.clone()));
|
||||
assert!(ctx.contains_procedure(meta.id));
|
||||
|
||||
assert_eq!(ProcedureState::Running, ctx.state(meta.id).unwrap());
|
||||
assert!(ctx.state(meta.id).unwrap().is_running());
|
||||
meta.set_state(ProcedureState::Done);
|
||||
assert_eq!(ProcedureState::Done, ctx.state(meta.id).unwrap());
|
||||
assert!(ctx.state(meta.id).unwrap().is_done());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -540,7 +540,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_register_loader() {
|
||||
let dir = TempDir::new("register").unwrap();
|
||||
let dir = create_temp_dir("register");
|
||||
let config = ManagerConfig {
|
||||
object_store: test_util::new_object_store(&dir),
|
||||
};
|
||||
@@ -558,7 +558,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_recover() {
|
||||
let dir = TempDir::new("recover").unwrap();
|
||||
let dir = create_temp_dir("recover");
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let config = ManagerConfig {
|
||||
object_store: object_store.clone(),
|
||||
@@ -603,7 +603,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_submit_procedure() {
|
||||
let dir = TempDir::new("submit").unwrap();
|
||||
let dir = create_temp_dir("submit");
|
||||
let config = ManagerConfig {
|
||||
object_store: test_util::new_object_store(&dir),
|
||||
};
|
||||
@@ -634,7 +634,7 @@ mod tests {
|
||||
// Wait for the procedure done.
|
||||
let mut watcher = manager.procedure_watcher(procedure_id).unwrap();
|
||||
watcher.changed().await.unwrap();
|
||||
assert_eq!(ProcedureState::Done, *watcher.borrow());
|
||||
assert!(watcher.borrow().is_done());
|
||||
|
||||
// Try to submit procedure with same id again.
|
||||
let err = manager
|
||||
@@ -649,7 +649,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_state_changed_on_err() {
|
||||
let dir = TempDir::new("on_err").unwrap();
|
||||
let dir = create_temp_dir("on_err");
|
||||
let config = ManagerConfig {
|
||||
object_store: test_util::new_object_store(&dir),
|
||||
};
|
||||
@@ -697,7 +697,7 @@ mod tests {
|
||||
.unwrap();
|
||||
// Wait for the notification.
|
||||
watcher.changed().await.unwrap();
|
||||
assert_eq!(ProcedureState::Failed, *watcher.borrow());
|
||||
assert!(watcher.borrow().is_failed());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::time::Duration;
|
||||
use common_telemetry::logging;
|
||||
use tokio::time;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::error::{ProcedurePanicSnafu, Result};
|
||||
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
|
||||
use crate::store::ProcedureStore;
|
||||
use crate::{BoxedProcedure, Context, ProcedureId, ProcedureState, ProcedureWithId, Status};
|
||||
@@ -30,7 +30,7 @@ enum ExecResult {
|
||||
Continue,
|
||||
Done,
|
||||
RetryLater,
|
||||
Failed(Error),
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -48,7 +48,7 @@ impl ExecResult {
|
||||
}
|
||||
|
||||
fn is_failed(&self) -> bool {
|
||||
matches!(self, ExecResult::Failed(_))
|
||||
matches!(self, ExecResult::Failed)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +83,11 @@ impl Drop for ProcedureGuard {
|
||||
// Set state to failed. This is useful in test as runtime may not abort when the runner task panics.
|
||||
// See https://github.com/tokio-rs/tokio/issues/2002 .
|
||||
// We set set_panic_hook() in the application's main function. But our tests don't have this panic hook.
|
||||
self.meta.set_state(ProcedureState::Failed);
|
||||
let err = ProcedurePanicSnafu {
|
||||
procedure_id: self.meta.id,
|
||||
}
|
||||
.build();
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(err)));
|
||||
}
|
||||
|
||||
// Notify parent procedure.
|
||||
@@ -109,7 +113,7 @@ pub(crate) struct Runner {
|
||||
|
||||
impl Runner {
|
||||
/// Run the procedure.
|
||||
pub(crate) async fn run(mut self) -> Result<()> {
|
||||
pub(crate) async fn run(mut self) {
|
||||
// Ensure we can update the procedure state.
|
||||
let guard = ProcedureGuard::new(self.meta.clone(), self.manager_ctx.clone());
|
||||
|
||||
@@ -129,12 +133,9 @@ impl Runner {
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut result = Ok(());
|
||||
// Execute the procedure. We need to release the lock whenever the the execution
|
||||
// is successful or fail.
|
||||
if let Err(e) = self.execute_procedure_in_loop().await {
|
||||
result = Err(e);
|
||||
}
|
||||
self.execute_procedure_in_loop().await;
|
||||
|
||||
// We can't remove the metadata of the procedure now as users and its parent might
|
||||
// need to query its state.
|
||||
@@ -155,11 +156,9 @@ impl Runner {
|
||||
self.procedure.type_name(),
|
||||
self.meta.id
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
async fn execute_procedure_in_loop(&mut self) -> Result<()> {
|
||||
async fn execute_procedure_in_loop(&mut self) {
|
||||
let ctx = Context {
|
||||
procedure_id: self.meta.id,
|
||||
provider: self.manager_ctx.clone(),
|
||||
@@ -168,11 +167,10 @@ impl Runner {
|
||||
loop {
|
||||
match self.execute_once(&ctx).await {
|
||||
ExecResult::Continue => (),
|
||||
ExecResult::Done => return Ok(()),
|
||||
ExecResult::Done | ExecResult::Failed => return,
|
||||
ExecResult::RetryLater => {
|
||||
self.wait_on_err().await;
|
||||
}
|
||||
ExecResult::Failed(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -222,14 +220,14 @@ impl Runner {
|
||||
return ExecResult::RetryLater;
|
||||
}
|
||||
|
||||
self.meta.set_state(ProcedureState::Failed);
|
||||
self.meta.set_state(ProcedureState::failed(Arc::new(e)));
|
||||
|
||||
// Write rollback key so we can skip this procedure while recovering procedures.
|
||||
if self.rollback_procedure().await.is_err() {
|
||||
return ExecResult::RetryLater;
|
||||
}
|
||||
|
||||
ExecResult::Failed(e)
|
||||
ExecResult::Failed
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -397,14 +395,14 @@ mod tests {
|
||||
use common_error::ext::PlainError;
|
||||
use common_error::mock::MockError;
|
||||
use common_error::prelude::StatusCode;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures_util::future::BoxFuture;
|
||||
use futures_util::{FutureExt, TryStreamExt};
|
||||
use object_store::ObjectStore;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::local::test_util;
|
||||
use crate::{ContextProvider, LockKey, Procedure};
|
||||
use crate::{ContextProvider, Error, LockKey, Procedure};
|
||||
|
||||
const ROOT_ID: &str = "9f805a1f-05f7-490c-9f91-bd56e3cc54c1";
|
||||
|
||||
@@ -513,7 +511,7 @@ mod tests {
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
let dir = TempDir::new("normal").unwrap();
|
||||
let dir = create_temp_dir("normal");
|
||||
let meta = normal.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
@@ -561,7 +559,7 @@ mod tests {
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
let dir = TempDir::new("suspend").unwrap();
|
||||
let dir = create_temp_dir("suspend");
|
||||
let meta = suspend.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
@@ -630,9 +628,14 @@ mod tests {
|
||||
// Wait for subprocedures.
|
||||
let mut all_child_done = true;
|
||||
for id in children_ids {
|
||||
if ctx.provider.procedure_state(id).await.unwrap()
|
||||
!= Some(ProcedureState::Done)
|
||||
{
|
||||
let is_not_done = ctx
|
||||
.provider
|
||||
.procedure_state(id)
|
||||
.await
|
||||
.unwrap()
|
||||
.map(|s| !s.is_done())
|
||||
.unwrap_or(true);
|
||||
if is_not_done {
|
||||
all_child_done = false;
|
||||
}
|
||||
}
|
||||
@@ -655,7 +658,7 @@ mod tests {
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
let dir = TempDir::new("parent").unwrap();
|
||||
let dir = create_temp_dir("parent");
|
||||
let meta = parent.new_meta(ROOT_ID);
|
||||
let procedure_id = meta.id;
|
||||
|
||||
@@ -668,7 +671,7 @@ mod tests {
|
||||
// Replace the manager ctx.
|
||||
runner.manager_ctx = manager_ctx;
|
||||
|
||||
runner.run().await.unwrap();
|
||||
runner.run().await;
|
||||
|
||||
// Check files on store.
|
||||
for child_id in children_ids {
|
||||
@@ -697,7 +700,7 @@ mod tests {
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
let dir = TempDir::new("fail").unwrap();
|
||||
let dir = create_temp_dir("fail");
|
||||
let meta = fail.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
@@ -706,7 +709,7 @@ mod tests {
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_failed(), "{res:?}");
|
||||
assert_eq!(ProcedureState::Failed, meta.state());
|
||||
assert!(meta.state().is_failed());
|
||||
check_files(&object_store, ctx.procedure_id, &["0000000000.rollback"]).await;
|
||||
}
|
||||
|
||||
@@ -732,7 +735,7 @@ mod tests {
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
let dir = TempDir::new("retry_later").unwrap();
|
||||
let dir = create_temp_dir("retry_later");
|
||||
let meta = retry_later.new_meta(ROOT_ID);
|
||||
let ctx = context_without_provider(meta.id);
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
@@ -741,11 +744,11 @@ mod tests {
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_retry_later(), "{res:?}");
|
||||
assert_eq!(ProcedureState::Running, meta.state());
|
||||
assert!(meta.state().is_running());
|
||||
|
||||
let res = runner.execute_once(&ctx).await;
|
||||
assert!(res.is_done(), "{res:?}");
|
||||
assert_eq!(ProcedureState::Done, meta.state());
|
||||
assert!(meta.state().is_done());
|
||||
check_files(&object_store, ctx.procedure_id, &["0000000000.commit"]).await;
|
||||
}
|
||||
|
||||
@@ -779,7 +782,8 @@ mod tests {
|
||||
} else {
|
||||
// Wait for subprocedures.
|
||||
let state = ctx.provider.procedure_state(child_id).await.unwrap();
|
||||
if state == Some(ProcedureState::Failed) {
|
||||
let is_failed = state.map(|s| s.is_failed()).unwrap_or(false);
|
||||
if is_failed {
|
||||
// The parent procedure to abort itself if child procedure is failed.
|
||||
Err(Error::from_error_ext(PlainError::new(
|
||||
"subprocedure failed".to_string(),
|
||||
@@ -802,7 +806,7 @@ mod tests {
|
||||
exec_fn,
|
||||
};
|
||||
|
||||
let dir = TempDir::new("child_err").unwrap();
|
||||
let dir = create_temp_dir("child_err");
|
||||
let meta = parent.new_meta(ROOT_ID);
|
||||
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
@@ -811,12 +815,13 @@ mod tests {
|
||||
|
||||
let manager_ctx = Arc::new(ManagerContext::new());
|
||||
// Manually add this procedure to the manager ctx.
|
||||
assert!(manager_ctx.try_insert_procedure(meta));
|
||||
assert!(manager_ctx.try_insert_procedure(meta.clone()));
|
||||
// Replace the manager ctx.
|
||||
runner.manager_ctx = manager_ctx;
|
||||
|
||||
// Run the runer and execute the procedure.
|
||||
let err = runner.run().await.unwrap_err();
|
||||
assert!(err.to_string().contains("subprocedure failed"), "{err}");
|
||||
runner.run().await;
|
||||
let err = meta.state().error().unwrap().to_string();
|
||||
assert!(err.contains("subprocedure failed"), "{err}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,10 +20,10 @@ use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use tokio::sync::watch::Receiver;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{Error, Result};
|
||||
use crate::watcher::Watcher;
|
||||
|
||||
/// Procedure execution status.
|
||||
#[derive(Debug)]
|
||||
@@ -198,20 +198,47 @@ impl FromStr for ProcedureId {
|
||||
/// Loader to recover the [Procedure] instance from serialized data.
|
||||
pub type BoxedProcedureLoader = Box<dyn Fn(&str) -> Result<BoxedProcedure> + Send>;
|
||||
|
||||
// TODO(yingwen): Find a way to return the error message if the procedure is failed.
|
||||
/// State of a submitted procedure.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub enum ProcedureState {
|
||||
/// The procedure is running.
|
||||
#[default]
|
||||
Running,
|
||||
/// The procedure is finished.
|
||||
Done,
|
||||
/// The procedure is failed and cannot proceed anymore.
|
||||
Failed,
|
||||
Failed { error: Arc<Error> },
|
||||
}
|
||||
|
||||
/// Watcher to watch procedure state.
|
||||
pub type Watcher = Receiver<ProcedureState>;
|
||||
impl ProcedureState {
|
||||
/// Returns a [ProcedureState] with failed state.
|
||||
pub fn failed(error: Arc<Error>) -> ProcedureState {
|
||||
ProcedureState::Failed { error }
|
||||
}
|
||||
|
||||
/// Returns true if the procedure state is running.
|
||||
pub fn is_running(&self) -> bool {
|
||||
matches!(self, ProcedureState::Running)
|
||||
}
|
||||
|
||||
/// Returns true if the procedure state is done.
|
||||
pub fn is_done(&self) -> bool {
|
||||
matches!(self, ProcedureState::Done)
|
||||
}
|
||||
|
||||
/// Returns true if the procedure state failed.
|
||||
pub fn is_failed(&self) -> bool {
|
||||
matches!(self, ProcedureState::Failed { .. })
|
||||
}
|
||||
|
||||
/// Returns the error.
|
||||
pub fn error(&self) -> Option<&Arc<Error>> {
|
||||
match self {
|
||||
ProcedureState::Failed { error } => Some(error),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(yingwen): Shutdown
|
||||
/// `ProcedureManager` executes [Procedure] submitted to it.
|
||||
@@ -244,6 +271,9 @@ pub type ProcedureManagerRef = Arc<dyn ProcedureManager>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use common_error::prelude::StatusCode;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -311,4 +341,17 @@ mod tests {
|
||||
let parsed = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(id, parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_procedure_state() {
|
||||
assert!(ProcedureState::Running.is_running());
|
||||
assert!(ProcedureState::Running.error().is_none());
|
||||
assert!(ProcedureState::Done.is_done());
|
||||
|
||||
let state = ProcedureState::failed(Arc::new(Error::external(MockError::new(
|
||||
StatusCode::Unexpected,
|
||||
))));
|
||||
assert!(state.is_failed());
|
||||
assert!(state.error().is_some());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -246,9 +246,9 @@ impl ParsedKey {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use async_trait::async_trait;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use object_store::services::Fs as Builder;
|
||||
use object_store::ObjectStoreBuilder;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::{Context, LockKey, Procedure, Status};
|
||||
@@ -373,7 +373,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_store_procedure() {
|
||||
let dir = TempDir::new("store_procedure").unwrap();
|
||||
let dir = create_temp_dir("store_procedure");
|
||||
let store = procedure_store_for_test(&dir);
|
||||
|
||||
let procedure_id = ProcedureId::random();
|
||||
@@ -398,7 +398,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_commit_procedure() {
|
||||
let dir = TempDir::new("commit_procedure").unwrap();
|
||||
let dir = create_temp_dir("commit_procedure");
|
||||
let store = procedure_store_for_test(&dir);
|
||||
|
||||
let procedure_id = ProcedureId::random();
|
||||
@@ -416,7 +416,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rollback_procedure() {
|
||||
let dir = TempDir::new("rollback_procedure").unwrap();
|
||||
let dir = create_temp_dir("rollback_procedure");
|
||||
let store = procedure_store_for_test(&dir);
|
||||
|
||||
let procedure_id = ProcedureId::random();
|
||||
@@ -434,7 +434,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_load_messages() {
|
||||
let dir = TempDir::new("load_messages").unwrap();
|
||||
let dir = create_temp_dir("load_messages");
|
||||
let store = procedure_store_for_test(&dir);
|
||||
|
||||
// store 3 steps
|
||||
|
||||
@@ -115,15 +115,15 @@ impl StateStore for ObjectStateStore {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use object_store::services::Fs as Builder;
|
||||
use object_store::ObjectStoreBuilder;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_object_state_store() {
|
||||
let dir = TempDir::new("state_store").unwrap();
|
||||
let dir = create_temp_dir("state_store");
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let accessor = Builder::default().root(store_dir).build().unwrap();
|
||||
let object_store = ObjectStore::new(accessor).finish();
|
||||
|
||||
38
src/common/procedure/src/watcher.rs
Normal file
38
src/common/procedure/src/watcher.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::ResultExt;
|
||||
use tokio::sync::watch::Receiver;
|
||||
|
||||
use crate::error::{ProcedureExecSnafu, Result, WaitWatcherSnafu};
|
||||
use crate::procedure::ProcedureState;
|
||||
|
||||
/// Watcher to watch procedure state.
|
||||
pub type Watcher = Receiver<ProcedureState>;
|
||||
|
||||
/// Wait the [Watcher] until the [ProcedureState] is done.
|
||||
pub async fn wait(watcher: &mut Watcher) -> Result<()> {
|
||||
loop {
|
||||
watcher.changed().await.context(WaitWatcherSnafu)?;
|
||||
match &*watcher.borrow() {
|
||||
ProcedureState::Running => (),
|
||||
ProcedureState::Done => {
|
||||
return Ok(());
|
||||
}
|
||||
ProcedureState::Failed { error } => {
|
||||
return Err(error.clone()).context(ProcedureExecSnafu);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -239,7 +239,6 @@ impl From<BoxedError> for Error {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
@@ -286,7 +285,7 @@ mod tests {
|
||||
fn test_convert_df_recordbatch_stream_error() {
|
||||
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
||||
Err(common_recordbatch::error::Error::PollStream {
|
||||
source: ArrowError::DivideByZero,
|
||||
source: DataFusionError::Internal("blabla".to_string()),
|
||||
backtrace: Backtrace::generate(),
|
||||
});
|
||||
let error = result
|
||||
|
||||
@@ -315,7 +315,11 @@ mod test {
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
|
||||
let physical_plan = ctx
|
||||
.state()
|
||||
.create_physical_plan(&logical_plan)
|
||||
.await
|
||||
.unwrap();
|
||||
let df_recordbatches = collect(physical_plan, Arc::new(TaskContext::from(&ctx)))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -18,9 +18,9 @@ use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use futures::ready;
|
||||
use snafu::ResultExt;
|
||||
@@ -57,14 +57,14 @@ impl DfRecordBatchStream for DfRecordBatchStreamAdapter {
|
||||
}
|
||||
|
||||
impl Stream for DfRecordBatchStreamAdapter {
|
||||
type Item = ArrowResult<DfRecordBatch>;
|
||||
type Item = DfResult<DfRecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(recordbatch)) => match recordbatch {
|
||||
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.into_df_record_batch()))),
|
||||
Err(e) => Poll::Ready(Some(Err(ArrowError::ExternalError(Box::new(e))))),
|
||||
Err(e) => Poll::Ready(Some(Err(DataFusionError::External(Box::new(e))))),
|
||||
},
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
}
|
||||
@@ -242,12 +242,12 @@ mod test {
|
||||
)]));
|
||||
let batch1 = RecordBatch::new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Vector::from_slice(&[1])) as _],
|
||||
vec![Arc::new(Int32Vector::from_slice([1])) as _],
|
||||
)
|
||||
.unwrap();
|
||||
let batch2 = RecordBatch::new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Vector::from_slice(&[2])) as _],
|
||||
vec![Arc::new(Int32Vector::from_slice([2])) as _],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Failed to poll stream, source: {}", source))]
|
||||
PollStream {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
source: datafusion::error::DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
|
||||
@@ -204,7 +204,7 @@ mod tests {
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
let v: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
|
||||
let v: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
|
||||
let expected = vec![RecordBatch::new(schema.clone(), vec![v.clone()]).unwrap()];
|
||||
let r = RecordBatches::try_from_columns(schema, vec![v]).unwrap();
|
||||
assert_eq!(r.take(), expected);
|
||||
@@ -216,7 +216,7 @@ mod tests {
|
||||
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
|
||||
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
|
||||
|
||||
let va: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
|
||||
let va: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
|
||||
let vb: VectorRef = Arc::new(StringVector::from(vec!["hello", "world"]));
|
||||
let vc: VectorRef = Arc::new(BooleanVector::from(vec![true, false]));
|
||||
|
||||
@@ -255,11 +255,11 @@ mod tests {
|
||||
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
|
||||
let schema = Arc::new(Schema::new(vec![column_a, column_b]));
|
||||
|
||||
let va1: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
|
||||
let va1: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
|
||||
let vb1: VectorRef = Arc::new(StringVector::from(vec!["a", "b"]));
|
||||
let batch1 = RecordBatch::new(schema.clone(), vec![va1, vb1]).unwrap();
|
||||
|
||||
let va2: VectorRef = Arc::new(Int32Vector::from_slice(&[3, 4, 5]));
|
||||
let va2: VectorRef = Arc::new(Int32Vector::from_slice([3, 4, 5]));
|
||||
let vb2: VectorRef = Arc::new(StringVector::from(vec!["c", "d", "e"]));
|
||||
let batch2 = RecordBatch::new(schema.clone(), vec![va2, vb2]).unwrap();
|
||||
|
||||
|
||||
@@ -189,8 +189,8 @@ mod tests {
|
||||
]));
|
||||
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
||||
|
||||
let c1 = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
|
||||
let c2 = Arc::new(UInt32Vector::from_slice(&[4, 5, 6]));
|
||||
let c1 = Arc::new(UInt32Vector::from_slice([1, 2, 3]));
|
||||
let c2 = Arc::new(UInt32Vector::from_slice([4, 5, 6]));
|
||||
let columns: Vec<VectorRef> = vec![c1, c2];
|
||||
|
||||
let batch = RecordBatch::new(schema.clone(), columns.clone()).unwrap();
|
||||
@@ -222,7 +222,7 @@ mod tests {
|
||||
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||
|
||||
let numbers: Vec<u32> = (0..10).collect();
|
||||
let columns = vec![Arc::new(UInt32Vector::from_slice(&numbers)) as VectorRef];
|
||||
let columns = vec![Arc::new(UInt32Vector::from_slice(numbers)) as VectorRef];
|
||||
let batch = RecordBatch::new(schema, columns).unwrap();
|
||||
|
||||
let output = serde_json::to_string(&batch).unwrap();
|
||||
|
||||
@@ -5,6 +5,8 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-recursion = "1.0"
|
||||
async-trait.workspace = true
|
||||
bytes = "1.1"
|
||||
catalog = { path = "../../catalog" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
@@ -15,6 +17,7 @@ datafusion-expr.workspace = true
|
||||
datatypes = { path = "../../datatypes" }
|
||||
futures = "0.3"
|
||||
prost.workspace = true
|
||||
session = { path = "../../session" }
|
||||
snafu.workspace = true
|
||||
table = { path = "../../table" }
|
||||
|
||||
|
||||
@@ -635,8 +635,6 @@ mod utils {
|
||||
Operator::Modulo => "modulo",
|
||||
Operator::And => "and",
|
||||
Operator::Or => "or",
|
||||
Operator::Like => "like",
|
||||
Operator::NotLike => "not_like",
|
||||
Operator::IsDistinctFrom => "is_distinct_from",
|
||||
Operator::IsNotDistinctFrom => "is_not_distinct_from",
|
||||
Operator::RegexMatch => "regex_match",
|
||||
@@ -649,8 +647,6 @@ mod utils {
|
||||
Operator::BitwiseShiftRight => "bitwise_shift_right",
|
||||
Operator::BitwiseShiftLeft => "bitwise_shift_left",
|
||||
Operator::StringConcat => "string_concat",
|
||||
Operator::ILike => "i_like",
|
||||
Operator::NotILike => "not_i_like",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,16 +14,20 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_recursion::async_recursion;
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use catalog::table_source::DfTableSourceProvider;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::common::{DFField, DFSchema};
|
||||
use datafusion::common::{DFField, DFSchema, OwnedTableReference};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use datafusion_expr::{Filter, LogicalPlan, TableScan, TableSource};
|
||||
use datafusion_expr::{Filter, LogicalPlan, TableScan};
|
||||
use prost::Message;
|
||||
use session::context::QueryContext;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use substrait_proto::proto::expression::mask_expression::{StructItem, StructSelect};
|
||||
use substrait_proto::proto::expression::MaskExpression;
|
||||
@@ -37,8 +41,8 @@ use table::table::adapter::DfTableProviderAdapter;
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::df_expr::{expression_from_df_expr, to_df_expr};
|
||||
use crate::error::{
|
||||
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
|
||||
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
|
||||
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error,
|
||||
InvalidParametersSnafu, MissingFieldSnafu, ResolveTableSnafu, SchemaNotMatchSnafu,
|
||||
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
|
||||
};
|
||||
use crate::schema::{from_schema, to_schema};
|
||||
@@ -46,18 +50,19 @@ use crate::SubstraitPlan;
|
||||
|
||||
pub struct DFLogicalSubstraitConvertor;
|
||||
|
||||
#[async_trait]
|
||||
impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
type Error = Error;
|
||||
|
||||
type Plan = LogicalPlan;
|
||||
|
||||
fn decode<B: Buf + Send>(
|
||||
async fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
message: B,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<Self::Plan, Self::Error> {
|
||||
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
|
||||
self.convert_plan(plan, catalog_manager)
|
||||
self.convert_plan(plan, catalog_manager).await
|
||||
}
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
@@ -71,7 +76,7 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
fn convert_plan(
|
||||
async fn convert_plan(
|
||||
&self,
|
||||
mut plan: Plan,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
@@ -102,20 +107,25 @@ impl DFLogicalSubstraitConvertor {
|
||||
.fail()?
|
||||
};
|
||||
|
||||
self.rel_to_logical_plan(&mut ctx, Box::new(rel), catalog_manager)
|
||||
// TODO(LFC): Create table provider from outside, respect "disallow_cross_schema_query" option in query engine state.
|
||||
let mut table_provider =
|
||||
DfTableSourceProvider::new(catalog_manager, false, &QueryContext::new());
|
||||
self.rel_to_logical_plan(&mut ctx, Box::new(rel), &mut table_provider)
|
||||
.await
|
||||
}
|
||||
|
||||
fn rel_to_logical_plan(
|
||||
#[async_recursion]
|
||||
async fn rel_to_logical_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
rel: Box<Rel>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_provider: &mut DfTableSourceProvider,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
|
||||
// build logical plan
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, catalog_manager)?,
|
||||
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, table_provider).await?,
|
||||
RelType::Filter(filter) => {
|
||||
let FilterRel {
|
||||
common: _,
|
||||
@@ -128,7 +138,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
field: "input",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
let input = Arc::new(self.rel_to_logical_plan(ctx, input, catalog_manager)?);
|
||||
let input = Arc::new(self.rel_to_logical_plan(ctx, input, table_provider).await?);
|
||||
|
||||
let condition = condition.context(MissingFieldSnafu {
|
||||
field: "condition",
|
||||
@@ -191,11 +201,11 @@ impl DFLogicalSubstraitConvertor {
|
||||
Ok(logical_plan)
|
||||
}
|
||||
|
||||
fn convert_read_rel(
|
||||
async fn convert_read_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
read_rel: Box<ReadRel>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_provider: &mut DfTableSourceProvider,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
|
||||
let read_type = read_rel.read_type.context(MissingFieldSnafu {
|
||||
@@ -230,17 +240,17 @@ impl DFLogicalSubstraitConvertor {
|
||||
.projection
|
||||
.map(|mask_expr| self.convert_mask_expression(mask_expr));
|
||||
|
||||
// Get table handle from catalog manager
|
||||
let table_ref = catalog_manager
|
||||
.table(&catalog_name, &schema_name, &table_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?
|
||||
.context(TableNotFoundSnafu {
|
||||
name: format!("{catalog_name}.{schema_name}.{table_name}"),
|
||||
let table_ref = OwnedTableReference::Full {
|
||||
catalog: catalog_name.clone(),
|
||||
schema: schema_name.clone(),
|
||||
table: table_name.clone(),
|
||||
};
|
||||
let adapter = table_provider
|
||||
.resolve_table(table_ref)
|
||||
.await
|
||||
.with_context(|_| ResolveTableSnafu {
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
})?;
|
||||
let adapter = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(table_ref),
|
||||
)));
|
||||
|
||||
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
|
||||
let stored_schema = adapter.schema();
|
||||
@@ -262,7 +272,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
};
|
||||
|
||||
// Calculate the projected schema
|
||||
let qualified = &format!("{catalog_name}.{schema_name}.{table_name}");
|
||||
let qualified = &format_full_table_name(&catalog_name, &schema_name, &table_name);
|
||||
let projected_schema = Arc::new(
|
||||
project_schema(&stored_schema, projection.as_ref())
|
||||
.and_then(|x| {
|
||||
@@ -281,7 +291,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
|
||||
// TODO(ruihang): Support limit(fetch)
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name: format!("{catalog_name}.{schema_name}.{table_name}"),
|
||||
table_name: qualified.to_string(),
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
@@ -314,7 +324,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.fail()?,
|
||||
LogicalPlan::Filter(filter) => {
|
||||
let input = Some(Box::new(
|
||||
self.logical_plan_to_rel(ctx, filter.input().clone())?,
|
||||
self.logical_plan_to_rel(ctx, filter.input.clone())?,
|
||||
));
|
||||
|
||||
let schema = plan
|
||||
@@ -324,7 +334,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let condition = Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
filter.predicate(),
|
||||
&filter.predicate,
|
||||
&schema,
|
||||
)?));
|
||||
|
||||
@@ -396,7 +406,10 @@ impl DFLogicalSubstraitConvertor {
|
||||
| LogicalPlan::Explain(_)
|
||||
| LogicalPlan::Analyze(_)
|
||||
| LogicalPlan::Extension(_)
|
||||
| LogicalPlan::Prepare(_) => InvalidParametersSnafu {
|
||||
| LogicalPlan::Prepare(_)
|
||||
| LogicalPlan::Dml(_)
|
||||
| LogicalPlan::DescribeTable(_)
|
||||
| LogicalPlan::Unnest(_) => InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Trying to convert DDL/DML plan to substrait proto, plan: {plan:?}",
|
||||
),
|
||||
@@ -524,6 +537,7 @@ mod test {
|
||||
use catalog::{CatalogList, CatalogProvider, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datafusion::common::{DFSchema, ToDFSchema};
|
||||
use datafusion_expr::TableSource;
|
||||
use datatypes::schema::RawSchema;
|
||||
use table::requests::CreateTableRequest;
|
||||
use table::test_util::{EmptyTable, MockTableEngine};
|
||||
@@ -572,7 +586,7 @@ mod test {
|
||||
let convertor = DFLogicalSubstraitConvertor;
|
||||
|
||||
let proto = convertor.encode(plan.clone()).unwrap();
|
||||
let tripped_plan = convertor.decode(proto, catalog).unwrap();
|
||||
let tripped_plan = convertor.decode(proto, catalog).await.unwrap();
|
||||
|
||||
assert_eq!(format!("{plan:?}"), format!("{tripped_plan:?}"));
|
||||
}
|
||||
|
||||
@@ -105,6 +105,13 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Unable to resolve table: {table_name}, error: {source}"))]
|
||||
ResolveTable {
|
||||
table_name: String,
|
||||
#[snafu(backtrace)]
|
||||
source: catalog::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -127,6 +134,7 @@ impl ErrorExt for Error {
|
||||
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
|
||||
Error::ConvertDfSchema { source } => source.status_code(),
|
||||
Error::ResolveTable { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(let_chains)]
|
||||
#![feature(trait_upcasting)]
|
||||
|
||||
mod context;
|
||||
mod df_expr;
|
||||
@@ -21,17 +22,19 @@ pub mod error;
|
||||
mod schema;
|
||||
mod types;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes};
|
||||
use catalog::CatalogManagerRef;
|
||||
|
||||
pub use crate::df_logical::DFLogicalSubstraitConvertor;
|
||||
|
||||
#[async_trait]
|
||||
pub trait SubstraitPlan {
|
||||
type Error: std::error::Error;
|
||||
|
||||
type Plan;
|
||||
|
||||
fn decode<B: Buf + Send>(
|
||||
async fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
message: B,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
|
||||
8
src/common/test-util/Cargo.toml
Normal file
8
src/common/test-util/Cargo.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "common-test-util"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile.workspace = true
|
||||
@@ -12,4 +12,4 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! GreptimeDB builtin functions
|
||||
pub mod temp_dir;
|
||||
23
src/common/test-util/src/temp_dir.rs
Normal file
23
src/common/test-util/src/temp_dir.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub use tempfile::{NamedTempFile, TempDir};
|
||||
|
||||
pub fn create_temp_dir(prefix: &str) -> TempDir {
|
||||
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
|
||||
}
|
||||
|
||||
pub fn create_named_temp_file() -> NamedTempFile {
|
||||
NamedTempFile::new().unwrap()
|
||||
}
|
||||
@@ -9,6 +9,7 @@ default = ["python"]
|
||||
python = ["dep:script"]
|
||||
|
||||
[dependencies]
|
||||
async-compat = "0.2"
|
||||
async-stream.workspace = true
|
||||
async-trait.workspace = true
|
||||
api = { path = "../api" }
|
||||
@@ -28,9 +29,11 @@ common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
humantime-serde = "1.1"
|
||||
log-store = { path = "../log-store" }
|
||||
@@ -42,6 +45,7 @@ object-store = { path = "../object-store" }
|
||||
pin-project = "1.0"
|
||||
prost.workspace = true
|
||||
query = { path = "../query" }
|
||||
regex = "1.6"
|
||||
script = { path = "../script", features = ["python"], optional = true }
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
@@ -59,11 +63,12 @@ tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tonic.workspace = true
|
||||
tower = { version = "0.4", features = ["full"] }
|
||||
tower-http = { version = "0.3", features = ["full"] }
|
||||
url = "2.3.1"
|
||||
|
||||
[dev-dependencies]
|
||||
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
|
||||
client = { path = "../client" }
|
||||
common-test-util = { path = "../common/test-util" }
|
||||
common-query = { path = "../common/query" }
|
||||
datafusion-common.workspace = true
|
||||
tempdir = "0.3"
|
||||
toml = "0.5"
|
||||
|
||||
@@ -21,6 +21,7 @@ use datafusion::parquet;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use storage::error::Error as StorageError;
|
||||
use table::error::Error as TableError;
|
||||
use url::ParseError;
|
||||
|
||||
use crate::datanode::ObjectStoreConfig;
|
||||
|
||||
@@ -190,6 +191,12 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build backend, source: {}", source))]
|
||||
BuildBackend {
|
||||
source: object_store::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Runtime resource error, source: {}", source))]
|
||||
RuntimeResource {
|
||||
#[snafu(backtrace)]
|
||||
@@ -199,6 +206,30 @@ pub enum Error {
|
||||
#[snafu(display("Invalid SQL, error: {}", msg))]
|
||||
InvalidSql { msg: String },
|
||||
|
||||
#[snafu(display("Invalid url: {}, error :{}", url, source))]
|
||||
InvalidUrl { url: String, source: ParseError },
|
||||
|
||||
#[snafu(display("Invalid filepath: {}", path))]
|
||||
InvalidPath { path: String },
|
||||
|
||||
#[snafu(display("Invalid connection: {}", msg))]
|
||||
InvalidConnection { msg: String },
|
||||
|
||||
#[snafu(display("Unsupported backend protocol: {}", protocol))]
|
||||
UnsupportedBackendProtocol { protocol: String },
|
||||
|
||||
#[snafu(display("Failed to regex, source: {}", source))]
|
||||
BuildRegex {
|
||||
backtrace: Backtrace,
|
||||
source: regex::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse the data, source: {}", source))]
|
||||
ParseDataTypes {
|
||||
#[snafu(backtrace)]
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Not support SQL, error: {}", msg))]
|
||||
NotSupportSql { msg: String },
|
||||
|
||||
@@ -371,6 +402,22 @@ pub enum Error {
|
||||
source: common_query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"File Schema mismatch, expected table schema: {} but found :{}",
|
||||
table_schema,
|
||||
file_schema
|
||||
))]
|
||||
InvalidSchema {
|
||||
table_schema: String,
|
||||
file_schema: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read parquet file, source: {}", source))]
|
||||
ReadParquet {
|
||||
source: parquet::errors::ParquetError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to write parquet file, source: {}", source))]
|
||||
WriteParquet {
|
||||
source: parquet::errors::ParquetError,
|
||||
@@ -379,10 +426,23 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Failed to poll stream, source: {}", source))]
|
||||
PollStream {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
source: datafusion_common::DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build parquet record batch stream, source: {}", source))]
|
||||
BuildParquetRecordBatchStream {
|
||||
backtrace: Backtrace,
|
||||
source: parquet::errors::ParquetError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read object in path: {}, source: {}", path, source))]
|
||||
ReadObject {
|
||||
path: String,
|
||||
backtrace: Backtrace,
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to write object into path: {}, source: {}", path, source))]
|
||||
WriteObject {
|
||||
path: String,
|
||||
@@ -390,6 +450,13 @@ pub enum Error {
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to lists object in path: {}, source: {}", path, source))]
|
||||
ListObjects {
|
||||
path: String,
|
||||
backtrace: Backtrace,
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Unrecognized table option: {}", source))]
|
||||
UnrecognizedTableOption {
|
||||
#[snafu(backtrace)]
|
||||
@@ -402,23 +469,18 @@ pub enum Error {
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to submit procedure, source: {}", source))]
|
||||
#[snafu(display("Failed to submit procedure {}, source: {}", procedure_id, source))]
|
||||
SubmitProcedure {
|
||||
procedure_id: ProcedureId,
|
||||
#[snafu(backtrace)]
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to wait procedure done, source: {}", source))]
|
||||
#[snafu(display("Failed to wait procedure {} done, source: {}", procedure_id, source))]
|
||||
WaitProcedure {
|
||||
source: tokio::sync::watch::error::RecvError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
// TODO(yingwen): Use procedure's error.
|
||||
#[snafu(display("Failed to execute procedure, procedure_id: {}", procedure_id))]
|
||||
ProcedureExec {
|
||||
procedure_id: ProcedureId,
|
||||
backtrace: Backtrace,
|
||||
#[snafu(backtrace)]
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -455,6 +517,11 @@ impl ErrorExt for Error {
|
||||
ColumnValuesNumberMismatch { .. }
|
||||
| ColumnTypeMismatch { .. }
|
||||
| InvalidSql { .. }
|
||||
| InvalidUrl { .. }
|
||||
| InvalidPath { .. }
|
||||
| InvalidConnection { .. }
|
||||
| UnsupportedBackendProtocol { .. }
|
||||
| BuildRegex { .. }
|
||||
| NotSupportSql { .. }
|
||||
| KeyColumnNotFound { .. }
|
||||
| IllegalPrimaryKeysDef { .. }
|
||||
@@ -480,11 +547,19 @@ impl ErrorExt for Error {
|
||||
| RenameTable { .. }
|
||||
| Catalog { .. }
|
||||
| MissingRequiredField { .. }
|
||||
| BuildParquetRecordBatchStream { .. }
|
||||
| InvalidSchema { .. }
|
||||
| ParseDataTypes { .. }
|
||||
| IncorrectInternalState { .. } => StatusCode::Internal,
|
||||
|
||||
InitBackend { .. } | WriteParquet { .. } | PollStream { .. } | WriteObject { .. } => {
|
||||
StatusCode::StorageUnavailable
|
||||
}
|
||||
BuildBackend { .. }
|
||||
| InitBackend { .. }
|
||||
| ReadParquet { .. }
|
||||
| WriteParquet { .. }
|
||||
| PollStream { .. }
|
||||
| ReadObject { .. }
|
||||
| WriteObject { .. }
|
||||
| ListObjects { .. } => StatusCode::StorageUnavailable,
|
||||
OpenLogStore { source } => source.status_code(),
|
||||
StartScriptManager { source } => source.status_code(),
|
||||
OpenStorageEngine { source } => source.status_code(),
|
||||
@@ -499,7 +574,7 @@ impl ErrorExt for Error {
|
||||
RecoverProcedure { source, .. } | SubmitProcedure { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
WaitProcedure { .. } | ProcedureExec { .. } => StatusCode::Internal,
|
||||
WaitProcedure { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -106,7 +106,7 @@ impl HeartbeatTask {
|
||||
let mut tx = Self::create_streams(&meta_client, running.clone()).await?;
|
||||
common_runtime::spawn_bg(async move {
|
||||
while running.load(Ordering::Acquire) {
|
||||
let region_num = match region_number(&catalog_manager_clone) {
|
||||
let region_num = match region_number(&catalog_manager_clone).await {
|
||||
Ok(region_num) => region_num as i64,
|
||||
Err(e) => {
|
||||
error!("failed to get region number, err: {e:?}");
|
||||
|
||||
@@ -177,7 +177,6 @@ impl Instance {
|
||||
};
|
||||
|
||||
let procedure_manager = create_procedure_manager(&opts.procedure).await?;
|
||||
// Recover procedures.
|
||||
if let Some(procedure_manager) = &procedure_manager {
|
||||
table_engine.register_procedure_loaders(&**procedure_manager);
|
||||
table_procedure::register_procedure_loaders(
|
||||
@@ -187,6 +186,7 @@ impl Instance {
|
||||
&**procedure_manager,
|
||||
);
|
||||
|
||||
// Recover procedures.
|
||||
procedure_manager
|
||||
.recover()
|
||||
.await
|
||||
@@ -423,7 +423,7 @@ pub(crate) async fn create_log_store(wal_config: &WalConfig) -> Result<RaftEngin
|
||||
Ok(logstore)
|
||||
}
|
||||
|
||||
async fn create_procedure_manager(
|
||||
pub(crate) async fn create_procedure_manager(
|
||||
procedure_config: &Option<ProcedureConfig>,
|
||||
) -> Result<Option<ProcedureManagerRef>> {
|
||||
let Some(procedure_config) = procedure_config else {
|
||||
|
||||
@@ -45,6 +45,7 @@ impl Instance {
|
||||
pub(crate) async fn execute_logical(&self, plan_bytes: Vec<u8>) -> Result<Output> {
|
||||
let logical_plan = DFLogicalSubstraitConvertor
|
||||
.decode(plan_bytes.as_slice(), self.catalog_manager.clone())
|
||||
.await
|
||||
.context(DecodeLogicalPlanSnafu)?;
|
||||
|
||||
self.query_engine
|
||||
@@ -74,6 +75,7 @@ impl Instance {
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog, schema, table_name)
|
||||
.await
|
||||
.context(error::CatalogSnafu)?
|
||||
.context(error::TableNotFoundSnafu { table_name })?;
|
||||
|
||||
@@ -96,6 +98,7 @@ impl Instance {
|
||||
DdlExpr::Alter(expr) => self.handle_alter(expr).await,
|
||||
DdlExpr::CreateDatabase(expr) => self.handle_create_database(expr, query_ctx).await,
|
||||
DdlExpr::DropTable(expr) => self.handle_drop_table(expr).await,
|
||||
DdlExpr::FlushTable(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -287,9 +290,9 @@ mod test {
|
||||
+---------------------+-------+-----+
|
||||
| ts | host | cpu |
|
||||
+---------------------+-------+-----+
|
||||
| 2022-12-30T07:09:00 | host1 | 1 |
|
||||
| 2022-12-30T07:09:00 | host1 | 1.0 |
|
||||
| 2022-12-30T07:09:01 | host2 | |
|
||||
| 2022-12-30T07:09:02 | host3 | 3 |
|
||||
| 2022-12-30T07:09:02 | host3 | 3.0 |
|
||||
+---------------------+-------+-----+";
|
||||
assert_eq!(recordbatches.pretty_print().unwrap(), expected);
|
||||
}
|
||||
@@ -325,7 +328,7 @@ mod test {
|
||||
+---------------------+-------+------+--------+
|
||||
| ts | host | cpu | memory |
|
||||
+---------------------+-------+------+--------+
|
||||
| 2022-12-28T04:17:05 | host1 | 66.6 | 1024 |
|
||||
| 2022-12-28T04:17:05 | host1 | 66.6 | 1024.0 |
|
||||
| 2022-12-28T04:17:06 | host2 | 88.8 | 333.3 |
|
||||
+---------------------+-------+------+--------+";
|
||||
let actual = recordbatch.pretty_print().unwrap();
|
||||
|
||||
@@ -24,15 +24,18 @@ use datatypes::schema::Schema;
|
||||
use futures::StreamExt;
|
||||
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
|
||||
use servers::error as server_error;
|
||||
use servers::promql::PromqlHandler;
|
||||
use servers::prom::PromHandler;
|
||||
use servers::query_handler::sql::SqlQueryHandler;
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::prelude::*;
|
||||
use sql::ast::ObjectName;
|
||||
use sql::statements::copy::CopyTable;
|
||||
use sql::statements::statement::Statement;
|
||||
use sql::statements::tql::Tql;
|
||||
use table::engine::TableReference;
|
||||
use table::requests::{CopyTableRequest, CreateDatabaseRequest, DropTableRequest};
|
||||
use table::requests::{
|
||||
CopyTableFromRequest, CopyTableRequest, CreateDatabaseRequest, DropTableRequest,
|
||||
};
|
||||
|
||||
use crate::error::{self, BumpTableIdSnafu, ExecuteSqlSnafu, Result, TableIdProviderNotFoundSnafu};
|
||||
use crate::instance::Instance;
|
||||
@@ -51,6 +54,7 @@ impl Instance {
|
||||
let logical_plan = self
|
||||
.query_engine
|
||||
.statement_to_plan(stmt, query_ctx)
|
||||
.await
|
||||
.context(ExecuteSqlSnafu)?;
|
||||
|
||||
self.query_engine
|
||||
@@ -183,22 +187,39 @@ impl Instance {
|
||||
|
||||
Ok(Output::RecordBatches(RecordBatches::empty()))
|
||||
}
|
||||
QueryStatement::Sql(Statement::Copy(copy_table)) => {
|
||||
let (catalog_name, schema_name, table_name) =
|
||||
table_idents_to_full_name(copy_table.table_name(), query_ctx.clone())?;
|
||||
let file_name = copy_table.file_name().to_string();
|
||||
QueryStatement::Sql(Statement::Copy(copy_table)) => match copy_table {
|
||||
CopyTable::To(copy_table) => {
|
||||
let (catalog_name, schema_name, table_name) =
|
||||
table_idents_to_full_name(copy_table.table_name(), query_ctx.clone())?;
|
||||
let file_name = copy_table.file_name().to_string();
|
||||
|
||||
let req = CopyTableRequest {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
file_name,
|
||||
};
|
||||
let req = CopyTableRequest {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
file_name,
|
||||
};
|
||||
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::CopyTable(req), query_ctx)
|
||||
.await
|
||||
}
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::CopyTable(req), query_ctx)
|
||||
.await
|
||||
}
|
||||
CopyTable::From(copy_table) => {
|
||||
let (catalog_name, schema_name, table_name) =
|
||||
table_idents_to_full_name(©_table.table_name, query_ctx.clone())?;
|
||||
let req = CopyTableFromRequest {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
connection: copy_table.connection,
|
||||
pattern: copy_table.pattern,
|
||||
from: copy_table.from,
|
||||
};
|
||||
self.sql_handler
|
||||
.execute(SqlRequest::CopyTableFrom(req), query_ctx)
|
||||
.await
|
||||
}
|
||||
},
|
||||
QueryStatement::Sql(Statement::Tql(tql)) => self.execute_tql(tql, query_ctx).await,
|
||||
}
|
||||
}
|
||||
@@ -216,6 +237,7 @@ impl Instance {
|
||||
let logical_plan = self
|
||||
.query_engine
|
||||
.statement_to_plan(stmt, query_ctx)
|
||||
.await
|
||||
.context(ExecuteSqlSnafu)?;
|
||||
|
||||
self.query_engine
|
||||
@@ -335,10 +357,15 @@ impl SqlQueryHandler for Instance {
|
||||
.await
|
||||
}
|
||||
|
||||
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
|
||||
async fn do_describe(
|
||||
&self,
|
||||
stmt: Statement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Option<Schema>> {
|
||||
if let Statement::Query(_) = stmt {
|
||||
self.query_engine
|
||||
.describe(QueryStatement::Sql(stmt), query_ctx)
|
||||
.await
|
||||
.map(Some)
|
||||
.context(error::DescribeStatementSnafu)
|
||||
} else {
|
||||
@@ -355,7 +382,7 @@ impl SqlQueryHandler for Instance {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PromqlHandler for Instance {
|
||||
impl PromHandler for Instance {
|
||||
async fn do_query(&self, query: &PromQuery) -> server_error::Result<Output> {
|
||||
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);
|
||||
|
||||
|
||||
@@ -31,9 +31,11 @@ use table::metadata::TableId;
|
||||
use table::table::TableIdProvider;
|
||||
|
||||
use crate::datanode::DatanodeOptions;
|
||||
use crate::error::{CatalogSnafu, Result};
|
||||
use crate::error::{CatalogSnafu, RecoverProcedureSnafu, Result};
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::instance::{create_log_store, new_object_store, DefaultEngine, Instance};
|
||||
use crate::instance::{
|
||||
create_log_store, create_procedure_manager, new_object_store, DefaultEngine, Instance,
|
||||
};
|
||||
use crate::script::ScriptExecutor;
|
||||
use crate::sql::SqlHandler;
|
||||
|
||||
@@ -93,6 +95,16 @@ impl Instance {
|
||||
let script_executor =
|
||||
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
|
||||
|
||||
let procedure_manager = create_procedure_manager(&opts.procedure).await?;
|
||||
if let Some(procedure_manager) = &procedure_manager {
|
||||
table_engine.register_procedure_loaders(&**procedure_manager);
|
||||
// Recover procedures.
|
||||
procedure_manager
|
||||
.recover()
|
||||
.await
|
||||
.context(RecoverProcedureSnafu)?;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
query_engine: query_engine.clone(),
|
||||
sql_handler: SqlHandler::new(
|
||||
@@ -100,7 +112,7 @@ impl Instance {
|
||||
catalog_manager.clone(),
|
||||
query_engine.clone(),
|
||||
table_engine,
|
||||
None,
|
||||
procedure_manager,
|
||||
),
|
||||
catalog_manager,
|
||||
script_executor,
|
||||
|
||||
@@ -33,6 +33,7 @@ use crate::instance::sql::table_idents_to_full_name;
|
||||
|
||||
mod alter;
|
||||
mod copy_table;
|
||||
mod copy_table_from;
|
||||
mod create;
|
||||
mod delete;
|
||||
mod drop_table;
|
||||
@@ -51,6 +52,7 @@ pub enum SqlRequest {
|
||||
Explain(Box<Explain>),
|
||||
Delete(Delete),
|
||||
CopyTable(CopyTableRequest),
|
||||
CopyTableFrom(CopyTableFromRequest),
|
||||
}
|
||||
|
||||
// Handler to execute SQL except query
|
||||
@@ -92,6 +94,7 @@ impl SqlHandler {
|
||||
SqlRequest::DropTable(req) => self.drop_table(req).await,
|
||||
SqlRequest::Delete(req) => self.delete(query_ctx.clone(), req).await,
|
||||
SqlRequest::CopyTable(req) => self.copy_table(req).await,
|
||||
SqlRequest::CopyTableFrom(req) => self.copy_table_from(req).await,
|
||||
SqlRequest::ShowDatabases(req) => {
|
||||
show_databases(req, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
|
||||
}
|
||||
@@ -105,6 +108,7 @@ impl SqlHandler {
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(&catalog, &schema, &table)
|
||||
.await
|
||||
.context(error::CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: req.name().to_string(),
|
||||
@@ -146,6 +150,7 @@ mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
@@ -166,7 +171,6 @@ mod tests {
|
||||
use table::error::Result as TableResult;
|
||||
use table::metadata::TableInfoRef;
|
||||
use table::Table;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
@@ -217,7 +221,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_statement_to_request() {
|
||||
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
|
||||
let dir = create_temp_dir("setup_test_engine_and_table");
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = Builder::default().root(&store_dir).build().unwrap();
|
||||
let object_store = ObjectStore::new(accessor).finish();
|
||||
@@ -244,7 +248,7 @@ mod tests {
|
||||
.unwrap(),
|
||||
);
|
||||
catalog_list.start().await.unwrap();
|
||||
catalog_list
|
||||
assert!(catalog_list
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
@@ -253,7 +257,7 @@ mod tests {
|
||||
table: Arc::new(DemoTable),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
.unwrap());
|
||||
|
||||
let factory = QueryEngineFactory::new(catalog_list.clone());
|
||||
let query_engine = factory.query_engine();
|
||||
|
||||
@@ -52,7 +52,10 @@ impl SqlHandler {
|
||||
.context(error::TableScanExecSnafu)?;
|
||||
let stream = Box::pin(DfRecordBatchStreamAdapter::new(stream));
|
||||
|
||||
let accessor = Builder::default().build().unwrap();
|
||||
let accessor = Builder::default()
|
||||
.root("/")
|
||||
.build()
|
||||
.context(error::BuildBackendSnafu)?;
|
||||
let object_store = ObjectStore::new(accessor).finish();
|
||||
|
||||
let mut parquet_writer = ParquetWriter::new(req.file_name, stream, object_store);
|
||||
|
||||
445
src/datanode/src/sql/copy_table_from.rs
Normal file
445
src/datanode/src/sql/copy_table_from.rs
Normal file
@@ -0,0 +1,445 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_compat::CompatExt;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::error::DataTypesSnafu;
|
||||
use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use futures::future;
|
||||
use futures_util::TryStreamExt;
|
||||
use object_store::services::{Fs, S3};
|
||||
use object_store::{Object, ObjectStore, ObjectStoreBuilder};
|
||||
use regex::Regex;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use table::engine::TableReference;
|
||||
use table::requests::{CopyTableFromRequest, InsertRequest};
|
||||
use tokio::io::BufReader;
|
||||
use url::{ParseError, Url};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::sql::SqlHandler;
|
||||
|
||||
const S3_SCHEMA: &str = "S3";
|
||||
const ENDPOINT_URL: &str = "ENDPOINT_URL";
|
||||
const ACCESS_KEY_ID: &str = "ACCESS_KEY_ID";
|
||||
const SECRET_ACCESS_KEY: &str = "SECRET_ACCESS_KEY";
|
||||
const SESSION_TOKEN: &str = "SESSION_TOKEN";
|
||||
const REGION: &str = "REGION";
|
||||
const ENABLE_VIRTUAL_HOST_STYLE: &str = "ENABLE_VIRTUAL_HOST_STYLE";
|
||||
|
||||
impl SqlHandler {
|
||||
pub(crate) async fn copy_table_from(&self, req: CopyTableFromRequest) -> Result<Output> {
|
||||
let table_ref = TableReference {
|
||||
catalog: &req.catalog_name,
|
||||
schema: &req.schema_name,
|
||||
table: &req.table_name,
|
||||
};
|
||||
let table = self.get_table(&table_ref)?;
|
||||
|
||||
let datasource = DataSource::new(&req.from, req.pattern, req.connection)?;
|
||||
|
||||
let objects = datasource.list().await?;
|
||||
|
||||
let mut buf: Vec<RecordBatch> = Vec::new();
|
||||
|
||||
for obj in objects.iter() {
|
||||
let reader = obj.reader().await.context(error::ReadObjectSnafu {
|
||||
path: &obj.path().to_string(),
|
||||
})?;
|
||||
|
||||
let buf_reader = BufReader::new(reader.compat());
|
||||
|
||||
let builder = ParquetRecordBatchStreamBuilder::new(buf_reader)
|
||||
.await
|
||||
.context(error::ReadParquetSnafu)?;
|
||||
|
||||
ensure!(
|
||||
builder.schema() == table.schema().arrow_schema(),
|
||||
error::InvalidSchemaSnafu {
|
||||
table_schema: table.schema().arrow_schema().to_string(),
|
||||
file_schema: (*(builder.schema())).to_string()
|
||||
}
|
||||
);
|
||||
|
||||
let stream = builder
|
||||
.build()
|
||||
.context(error::BuildParquetRecordBatchStreamSnafu)?;
|
||||
|
||||
let chunk = stream
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.context(error::ReadParquetSnafu)?;
|
||||
|
||||
buf.extend(chunk.into_iter());
|
||||
}
|
||||
|
||||
let fields = table
|
||||
.schema()
|
||||
.arrow_schema()
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| f.name().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Vec<Columns>
|
||||
let column_chunks = buf
|
||||
.into_iter()
|
||||
.map(|c| Helper::try_into_vectors(c.columns()).context(DataTypesSnafu))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut futs = Vec::with_capacity(column_chunks.len());
|
||||
|
||||
for column_chunk in column_chunks.into_iter() {
|
||||
let column_chunk = column_chunk.context(error::ParseDataTypesSnafu)?;
|
||||
let columns_values = fields
|
||||
.iter()
|
||||
.cloned()
|
||||
.zip(column_chunk.into_iter())
|
||||
.collect::<HashMap<String, VectorRef>>();
|
||||
|
||||
futs.push(table.insert(InsertRequest {
|
||||
catalog_name: req.catalog_name.to_string(),
|
||||
schema_name: req.schema_name.to_string(),
|
||||
table_name: req.table_name.to_string(),
|
||||
columns_values,
|
||||
//TODO: support multi-regions
|
||||
region_number: 0,
|
||||
}))
|
||||
}
|
||||
|
||||
let result = futures::future::try_join_all(futs)
|
||||
.await
|
||||
.context(error::InsertSnafu {
|
||||
table_name: req.table_name.to_string(),
|
||||
})?;
|
||||
|
||||
Ok(Output::AffectedRows(result.iter().sum()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum Source {
|
||||
Filename(String),
|
||||
Dir,
|
||||
}
|
||||
|
||||
struct DataSource {
|
||||
object_store: ObjectStore,
|
||||
source: Source,
|
||||
path: String,
|
||||
regex: Option<Regex>,
|
||||
}
|
||||
|
||||
impl DataSource {
|
||||
fn from_path(url: &str, regex: Option<Regex>) -> Result<DataSource> {
|
||||
let result = if url.ends_with('/') {
|
||||
Url::from_directory_path(url)
|
||||
} else {
|
||||
Url::from_file_path(url)
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(url) => {
|
||||
let path = url.path();
|
||||
|
||||
let (path, filename) = DataSource::find_dir_and_filename(path);
|
||||
|
||||
let source = if let Some(filename) = filename {
|
||||
Source::Filename(filename)
|
||||
} else {
|
||||
Source::Dir
|
||||
};
|
||||
|
||||
let accessor = Fs::default()
|
||||
.root(&path)
|
||||
.build()
|
||||
.context(error::BuildBackendSnafu)?;
|
||||
|
||||
Ok(DataSource {
|
||||
object_store: ObjectStore::new(accessor).finish(),
|
||||
source,
|
||||
path,
|
||||
regex,
|
||||
})
|
||||
}
|
||||
Err(()) => error::InvalidPathSnafu {
|
||||
path: url.to_string(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_s3_backend(
|
||||
host: Option<&str>,
|
||||
path: &str,
|
||||
connection: HashMap<String, String>,
|
||||
) -> Result<ObjectStore> {
|
||||
let mut builder = S3::default();
|
||||
|
||||
builder.root(path);
|
||||
|
||||
if let Some(bucket) = host {
|
||||
builder.bucket(bucket);
|
||||
}
|
||||
|
||||
if let Some(endpoint) = connection.get(ENDPOINT_URL) {
|
||||
builder.endpoint(endpoint);
|
||||
}
|
||||
|
||||
if let Some(region) = connection.get(REGION) {
|
||||
builder.region(region);
|
||||
}
|
||||
|
||||
if let Some(key_id) = connection.get(ACCESS_KEY_ID) {
|
||||
builder.access_key_id(key_id);
|
||||
}
|
||||
|
||||
if let Some(key) = connection.get(SECRET_ACCESS_KEY) {
|
||||
builder.secret_access_key(key);
|
||||
}
|
||||
|
||||
if let Some(session_token) = connection.get(SESSION_TOKEN) {
|
||||
builder.security_token(session_token);
|
||||
}
|
||||
|
||||
if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) {
|
||||
let enable = enable_str.as_str().parse::<bool>().map_err(|e| {
|
||||
error::InvalidConnectionSnafu {
|
||||
msg: format!(
|
||||
"failed to parse the option {}={}, {}",
|
||||
ENABLE_VIRTUAL_HOST_STYLE, enable_str, e
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
if enable {
|
||||
builder.enable_virtual_host_style();
|
||||
}
|
||||
}
|
||||
|
||||
let accessor = builder.build().context(error::BuildBackendSnafu)?;
|
||||
|
||||
Ok(ObjectStore::new(accessor).finish())
|
||||
}
|
||||
|
||||
fn from_url(
|
||||
url: Url,
|
||||
regex: Option<Regex>,
|
||||
connection: HashMap<String, String>,
|
||||
) -> Result<DataSource> {
|
||||
let host = url.host_str();
|
||||
|
||||
let path = url.path();
|
||||
|
||||
let schema = url.scheme();
|
||||
|
||||
let (dir, filename) = DataSource::find_dir_and_filename(path);
|
||||
|
||||
let source = if let Some(filename) = filename {
|
||||
Source::Filename(filename)
|
||||
} else {
|
||||
Source::Dir
|
||||
};
|
||||
|
||||
let object_store = match schema.to_uppercase().as_str() {
|
||||
S3_SCHEMA => DataSource::build_s3_backend(host, &dir, connection)?,
|
||||
_ => {
|
||||
return error::UnsupportedBackendProtocolSnafu {
|
||||
protocol: schema.to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(DataSource {
|
||||
object_store,
|
||||
source,
|
||||
path: dir,
|
||||
regex,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
url: &str,
|
||||
pattern: Option<String>,
|
||||
connection: HashMap<String, String>,
|
||||
) -> Result<DataSource> {
|
||||
let regex = if let Some(pattern) = pattern {
|
||||
let regex = Regex::new(&pattern).context(error::BuildRegexSnafu)?;
|
||||
Some(regex)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let result = Url::parse(url);
|
||||
|
||||
match result {
|
||||
Ok(url) => DataSource::from_url(url, regex, connection),
|
||||
Err(err) => {
|
||||
if ParseError::RelativeUrlWithoutBase == err {
|
||||
DataSource::from_path(url, regex)
|
||||
} else {
|
||||
Err(error::Error::InvalidUrl {
|
||||
url: url.to_string(),
|
||||
source: err,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<Object>> {
|
||||
match &self.source {
|
||||
Source::Dir => {
|
||||
let streamer = self
|
||||
.object_store
|
||||
.object("/")
|
||||
.list()
|
||||
.await
|
||||
.context(error::ListObjectsSnafu { path: &self.path })?;
|
||||
streamer
|
||||
.try_filter(|f| {
|
||||
let res = if let Some(regex) = &self.regex {
|
||||
regex.is_match(f.name())
|
||||
} else {
|
||||
true
|
||||
};
|
||||
future::ready(res)
|
||||
})
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.context(error::ListObjectsSnafu { path: &self.path })
|
||||
}
|
||||
Source::Filename(filename) => {
|
||||
let obj = self.object_store.object(filename);
|
||||
|
||||
Ok(vec![obj])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_dir_and_filename(path: &str) -> (String, Option<String>) {
|
||||
if path.is_empty() {
|
||||
("/".to_string(), None)
|
||||
} else if path.ends_with('/') {
|
||||
(path.to_string(), None)
|
||||
} else if let Some(idx) = path.rfind('/') {
|
||||
(
|
||||
path[..idx + 1].to_string(),
|
||||
Some(path[idx + 1..].to_string()),
|
||||
)
|
||||
} else {
|
||||
("/".to_string(), Some(path.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use url::Url;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_parse_uri() {
|
||||
struct Test<'a> {
|
||||
uri: &'a str,
|
||||
expected_path: &'a str,
|
||||
expected_schema: &'a str,
|
||||
}
|
||||
|
||||
let tests = [
|
||||
Test {
|
||||
uri: "s3://bucket/to/path/",
|
||||
expected_path: "/to/path/",
|
||||
expected_schema: "s3",
|
||||
},
|
||||
Test {
|
||||
uri: "fs:///to/path/",
|
||||
expected_path: "/to/path/",
|
||||
expected_schema: "fs",
|
||||
},
|
||||
Test {
|
||||
uri: "fs:///to/path/file",
|
||||
expected_path: "/to/path/file",
|
||||
expected_schema: "fs",
|
||||
},
|
||||
];
|
||||
for test in tests {
|
||||
let parsed_uri = Url::parse(test.uri).unwrap();
|
||||
assert_eq!(parsed_uri.path(), test.expected_path);
|
||||
assert_eq!(parsed_uri.scheme(), test.expected_schema);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_path_and_dir() {
|
||||
let parsed = Url::from_file_path("/to/path/file").unwrap();
|
||||
assert_eq!(parsed.path(), "/to/path/file");
|
||||
|
||||
let parsed = Url::from_directory_path("/to/path/").unwrap();
|
||||
assert_eq!(parsed.path(), "/to/path/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_dir_and_filename() {
|
||||
struct Test<'a> {
|
||||
path: &'a str,
|
||||
expected_dir: &'a str,
|
||||
expected_filename: Option<String>,
|
||||
}
|
||||
|
||||
let tests = [
|
||||
Test {
|
||||
path: "to/path/",
|
||||
expected_dir: "to/path/",
|
||||
expected_filename: None,
|
||||
},
|
||||
Test {
|
||||
path: "to/path/filename",
|
||||
expected_dir: "to/path/",
|
||||
expected_filename: Some("filename".into()),
|
||||
},
|
||||
Test {
|
||||
path: "/to/path/filename",
|
||||
expected_dir: "/to/path/",
|
||||
expected_filename: Some("filename".into()),
|
||||
},
|
||||
Test {
|
||||
path: "/",
|
||||
expected_dir: "/",
|
||||
expected_filename: None,
|
||||
},
|
||||
Test {
|
||||
path: "filename",
|
||||
expected_dir: "/",
|
||||
expected_filename: Some("filename".into()),
|
||||
},
|
||||
Test {
|
||||
path: "",
|
||||
expected_dir: "/",
|
||||
expected_filename: None,
|
||||
},
|
||||
];
|
||||
|
||||
for test in tests {
|
||||
let (path, filename) = DataSource::find_dir_and_filename(test.path);
|
||||
assert_eq!(test.expected_dir, path);
|
||||
assert_eq!(test.expected_filename, filename)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use catalog::{RegisterSchemaRequest, RegisterTableRequest};
|
||||
use common_procedure::{ProcedureManagerRef, ProcedureState, ProcedureWithId};
|
||||
use common_procedure::{watcher, ProcedureManagerRef, ProcedureWithId};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing::{error, info};
|
||||
use datatypes::schema::RawSchema;
|
||||
@@ -33,8 +33,8 @@ use table_procedure::CreateTableProcedure;
|
||||
use crate::error::{
|
||||
self, CatalogNotFoundSnafu, CatalogSnafu, ConstraintNotSupportedSnafu, CreateTableSnafu,
|
||||
IllegalPrimaryKeysDefSnafu, InsertSystemCatalogSnafu, KeyColumnNotFoundSnafu,
|
||||
ProcedureExecSnafu, RegisterSchemaSnafu, Result, SchemaExistsSnafu, SchemaNotFoundSnafu,
|
||||
SubmitProcedureSnafu, UnrecognizedTableOptionSnafu, WaitProcedureSnafu,
|
||||
RegisterSchemaSnafu, Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SubmitProcedureSnafu,
|
||||
UnrecognizedTableOptionSnafu, WaitProcedureSnafu,
|
||||
};
|
||||
use crate::sql::SqlHandler;
|
||||
|
||||
@@ -152,21 +152,13 @@ impl SqlHandler {
|
||||
let mut watcher = procedure_manager
|
||||
.submit(procedure_with_id)
|
||||
.await
|
||||
.context(SubmitProcedureSnafu)?;
|
||||
.context(SubmitProcedureSnafu { procedure_id })?;
|
||||
|
||||
// TODO(yingwen): Wrap this into a function and add error to ProcedureState::Failed.
|
||||
loop {
|
||||
watcher.changed().await.context(WaitProcedureSnafu)?;
|
||||
match *watcher.borrow() {
|
||||
ProcedureState::Running => (),
|
||||
ProcedureState::Done => {
|
||||
return Ok(Output::AffectedRows(0));
|
||||
}
|
||||
ProcedureState::Failed => {
|
||||
return ProcedureExecSnafu { procedure_id }.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
watcher::wait(&mut watcher)
|
||||
.await
|
||||
.context(WaitProcedureSnafu { procedure_id })?;
|
||||
|
||||
Ok(Output::AffectedRows(0))
|
||||
}
|
||||
|
||||
/// Converts [CreateTable] to [SqlRequest::CreateTable].
|
||||
|
||||
@@ -15,6 +15,7 @@ use std::collections::HashMap;
|
||||
use std::pin::Pin;
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatch;
|
||||
use datafusion_expr::type_coercion::binary::coerce_types;
|
||||
@@ -239,6 +240,7 @@ impl SqlHandler {
|
||||
QueryStatement::Sql(Statement::Query(Box::new(query))),
|
||||
query_ctx.clone(),
|
||||
)
|
||||
.await
|
||||
.context(ExecuteSqlSnafu)?;
|
||||
|
||||
let output = self
|
||||
@@ -284,9 +286,10 @@ impl SqlHandler {
|
||||
|
||||
let table = catalog_manager
|
||||
.table(&catalog_name, &schema_name, &table_name)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: table_name.clone(),
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
})?;
|
||||
|
||||
if stmt.is_insert_select() {
|
||||
|
||||
@@ -236,7 +236,7 @@ async fn test_execute_insert_by_select() {
|
||||
+-------+------+--------+---------------------+
|
||||
| host | cpu | memory | ts |
|
||||
+-------+------+--------+---------------------+
|
||||
| host1 | 66.6 | 1024 | 2022-06-15T07:02:37 |
|
||||
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
|
||||
| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 |
|
||||
+-------+------+--------+---------------------+"
|
||||
.to_string();
|
||||
@@ -457,8 +457,8 @@ async fn test_rename_table() {
|
||||
+-------+-----+--------+---------------------+
|
||||
| host | cpu | memory | ts |
|
||||
+-------+-----+--------+---------------------+
|
||||
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 |
|
||||
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 |
|
||||
| host1 | 1.1 | 100.0 | 1970-01-01T00:00:01 |
|
||||
| host2 | 2.2 | 200.0 | 1970-01-01T00:00:02 |
|
||||
+-------+-----+--------+---------------------+\
|
||||
"
|
||||
.to_string();
|
||||
@@ -559,9 +559,9 @@ async fn test_alter_table() {
|
||||
+-------+-----+--------+---------------------+--------+
|
||||
| host | cpu | memory | ts | my_tag |
|
||||
+-------+-----+--------+---------------------+--------+
|
||||
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 300 | 1970-01-01T00:00:03 | |
|
||||
| host1 | 1.1 | 100.0 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 200.0 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 300.0 | 1970-01-01T00:00:03 | |
|
||||
+-------+-----+--------+---------------------+--------+\
|
||||
"
|
||||
.to_string();
|
||||
@@ -594,14 +594,14 @@ async fn test_alter_table() {
|
||||
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = "\
|
||||
+-------+-----+---------------------+--------+
|
||||
| host | cpu | ts | my_tag |
|
||||
+-------+-----+---------------------+--------+
|
||||
| host1 | 1.1 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 1970-01-01T00:00:03 | |
|
||||
| host4 | 400 | 1970-01-01T00:00:04 | world |
|
||||
+-------+-----+---------------------+--------+\
|
||||
+-------+-------+---------------------+--------+
|
||||
| host | cpu | ts | my_tag |
|
||||
+-------+-------+---------------------+--------+
|
||||
| host1 | 1.1 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 1970-01-01T00:00:03 | |
|
||||
| host4 | 400.0 | 1970-01-01T00:00:04 | world |
|
||||
+-------+-------+---------------------+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
@@ -757,14 +757,165 @@ async fn test_delete() {
|
||||
+-------+---------------------+------+--------+
|
||||
| host | ts | cpu | memory |
|
||||
+-------+---------------------+------+--------+
|
||||
| host2 | 2022-06-15T07:02:38 | 77.7 | 2048 |
|
||||
| host3 | 2022-06-15T07:02:39 | 88.8 | 3072 |
|
||||
| host2 | 2022-06-15T07:02:38 | 77.7 | 2048.0 |
|
||||
| host3 | 2022-06-15T07:02:39 | 88.8 | 3072.0 |
|
||||
+-------+---------------------+------+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expect).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_copy_to() {
|
||||
let instance = setup_test_instance("test_execute_copy_to").await;
|
||||
|
||||
// setups
|
||||
execute_sql(
|
||||
&instance,
|
||||
"create table demo(host string, cpu double, memory double, ts timestamp time index);",
|
||||
)
|
||||
.await;
|
||||
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into demo(host, cpu, memory, ts) values
|
||||
('host1', 66.6, 1024, 1655276557000),
|
||||
('host2', 88.8, 333.3, 1655276558000)
|
||||
"#,
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
|
||||
// exports
|
||||
let data_dir = instance.data_tmp_dir().path();
|
||||
|
||||
let copy_to_stmt = format!("Copy demo TO '{}/export/demo.parquet'", data_dir.display());
|
||||
|
||||
let output = execute_sql(&instance, ©_to_stmt).await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_copy_from() {
|
||||
let instance = setup_test_instance("test_execute_copy_from").await;
|
||||
|
||||
// setups
|
||||
execute_sql(
|
||||
&instance,
|
||||
"create table demo(host string, cpu double, memory double, ts timestamp time index);",
|
||||
)
|
||||
.await;
|
||||
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"insert into demo(host, cpu, memory, ts) values
|
||||
('host1', 66.6, 1024, 1655276557000),
|
||||
('host2', 88.8, 333.3, 1655276558000)
|
||||
"#,
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
|
||||
// export
|
||||
let data_dir = instance.data_tmp_dir().path();
|
||||
|
||||
let copy_to_stmt = format!("Copy demo TO '{}/export/demo.parquet'", data_dir.display());
|
||||
|
||||
let output = execute_sql(&instance, ©_to_stmt).await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
|
||||
struct Test<'a> {
|
||||
sql: &'a str,
|
||||
table_name: &'a str,
|
||||
}
|
||||
let tests = [
|
||||
Test {
|
||||
sql: &format!(
|
||||
"Copy with_filename FROM '{}/export/demo.parquet_1_2'",
|
||||
data_dir.display()
|
||||
),
|
||||
table_name: "with_filename",
|
||||
},
|
||||
Test {
|
||||
sql: &format!("Copy with_path FROM '{}/export/'", data_dir.display()),
|
||||
table_name: "with_path",
|
||||
},
|
||||
Test {
|
||||
sql: &format!(
|
||||
"Copy with_pattern FROM '{}/export/' WITH (PATTERN = 'demo.*')",
|
||||
data_dir.display()
|
||||
),
|
||||
table_name: "with_pattern",
|
||||
},
|
||||
];
|
||||
|
||||
for test in tests {
|
||||
// import
|
||||
execute_sql(
|
||||
&instance,
|
||||
&format!(
|
||||
"create table {}(host string, cpu double, memory double, ts timestamp time index);",
|
||||
test.table_name
|
||||
),
|
||||
)
|
||||
.await;
|
||||
|
||||
let output = execute_sql(&instance, test.sql).await;
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
&format!("select * from {} order by ts", test.table_name),
|
||||
)
|
||||
.await;
|
||||
let expected = "\
|
||||
+-------+------+--------+---------------------+
|
||||
| host | cpu | memory | ts |
|
||||
+-------+------+--------+---------------------+
|
||||
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
|
||||
| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 |
|
||||
+-------+------+--------+---------------------+"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_create_by_procedure() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let instance = MockInstance::with_procedure_enabled("create_by_procedure").await;
|
||||
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"create table test_table(
|
||||
host string,
|
||||
ts timestamp,
|
||||
cpu double default 0,
|
||||
memory double,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(host)
|
||||
) engine=mito with(regions=1);"#,
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(0)));
|
||||
|
||||
// Create if not exists
|
||||
let output = execute_sql(
|
||||
&instance,
|
||||
r#"create table if not exists test_table(
|
||||
host string,
|
||||
ts timestamp,
|
||||
cpu double default 0,
|
||||
memory double,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(host)
|
||||
) engine=mito with(regions=1);"#,
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(output, Output::AffectedRows(0)));
|
||||
}
|
||||
|
||||
async fn execute_sql(instance: &MockInstance, sql: &str) -> Output {
|
||||
execute_sql_in_db(instance, sql, DEFAULT_SCHEMA_NAME).await
|
||||
}
|
||||
|
||||
@@ -106,17 +106,17 @@ async fn sql_insert_tql_query_ceil() {
|
||||
"+---------------------+-----------+--------------+-------+\
|
||||
\n| ts | ceil(cpu) | ceil(memory) | host |\
|
||||
\n+---------------------+-----------+--------------+-------+\
|
||||
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
|
||||
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
|
||||
\n| 1970-01-01T00:00:20 | 100 | 20480 | host1 |\
|
||||
\n| 1970-01-01T00:00:30 | 32 | 8192 | host1 |\
|
||||
\n| 1970-01-01T00:00:40 | 96 | 334 | host1 |\
|
||||
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
|
||||
\n| 1970-01-01T00:01:00 | 12424 | 1334 | host1 |\
|
||||
\n| 1970-01-01T00:01:10 | 12424 | 1334 | host1 |\
|
||||
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
|
||||
\n| 1970-01-01T00:01:30 | 0 | 2334 | host1 |\
|
||||
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
|
||||
\n| 1970-01-01T00:00:00 | 67.0 | 1024.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:10 | 100.0 | 20480.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:20 | 100.0 | 20480.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:30 | 32.0 | 8192.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:40 | 96.0 | 334.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:50 | 12424.0 | 1334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:00 | 12424.0 | 1334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:10 | 12424.0 | 1334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:20 | 0.0 | 2334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:30 | 0.0 | 2334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:40 | 49.0 | 3334.0 | host1 |\
|
||||
\n+---------------------+-----------+--------------+-------+",
|
||||
)
|
||||
.await;
|
||||
@@ -154,12 +154,12 @@ async fn sql_insert_promql_query_ceil() {
|
||||
"+---------------------+-----------+--------------+-------+\
|
||||
\n| ts | ceil(cpu) | ceil(memory) | host |\
|
||||
\n+---------------------+-----------+--------------+-------+\
|
||||
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
|
||||
\n| 1970-01-01T00:00:05 | 67 | 4096 | host1 |\
|
||||
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
|
||||
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
|
||||
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
|
||||
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
|
||||
\n| 1970-01-01T00:00:00 | 67.0 | 1024.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:05 | 67.0 | 4096.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:10 | 100.0 | 20480.0 | host1 |\
|
||||
\n| 1970-01-01T00:00:50 | 12424.0 | 1334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:20 | 0.0 | 2334.0 | host1 |\
|
||||
\n| 1970-01-01T00:01:40 | 49.0 | 3334.0 | host1 |\
|
||||
\n+---------------------+-----------+--------------+-------+",
|
||||
)
|
||||
.await;
|
||||
@@ -214,8 +214,8 @@ async fn aggregators_simple_sum() {
|
||||
"+------------+---------------------+--------------------------+\
|
||||
\n| group | ts | SUM(http_requests.value) |\
|
||||
\n+------------+---------------------+--------------------------+\
|
||||
\n| production | 1970-01-01T00:00:00 | 300 |\
|
||||
\n| canary | 1970-01-01T00:00:00 | 700 |\
|
||||
\n| production | 1970-01-01T00:00:00 | 300.0 |\
|
||||
\n| canary | 1970-01-01T00:00:00 | 700.0 |\
|
||||
\n+------------+---------------------+--------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -238,8 +238,8 @@ async fn aggregators_simple_avg() {
|
||||
"+------------+---------------------+--------------------------+\
|
||||
\n| group | ts | AVG(http_requests.value) |\
|
||||
\n+------------+---------------------+--------------------------+\
|
||||
\n| production | 1970-01-01T00:00:00 | 150 |\
|
||||
\n| canary | 1970-01-01T00:00:00 | 350 |\
|
||||
\n| production | 1970-01-01T00:00:00 | 150.0 |\
|
||||
\n| canary | 1970-01-01T00:00:00 | 350.0 |\
|
||||
\n+------------+---------------------+--------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -286,8 +286,8 @@ async fn aggregators_simple_without() {
|
||||
"+------------+------------+---------------------+--------------------------+\
|
||||
\n| group | job | ts | SUM(http_requests.value) |\
|
||||
\n+------------+------------+---------------------+--------------------------+\
|
||||
\n| production | api-server | 1970-01-01T00:00:00 | 300 |\
|
||||
\n| canary | api-server | 1970-01-01T00:00:00 | 700 |\
|
||||
\n| production | api-server | 1970-01-01T00:00:00 | 300.0 |\
|
||||
\n| canary | api-server | 1970-01-01T00:00:00 | 700.0 |\
|
||||
\n+------------+------------+---------------------+--------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -309,7 +309,7 @@ async fn aggregators_empty_by() {
|
||||
"+---------------------+--------------------------+\
|
||||
\n| ts | SUM(http_requests.value) |\
|
||||
\n+---------------------+--------------------------+\
|
||||
\n| 1970-01-01T00:00:00 | 1000 |\
|
||||
\n| 1970-01-01T00:00:00 | 1000.0 |\
|
||||
\n+---------------------+--------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -331,7 +331,7 @@ async fn aggregators_no_by_without() {
|
||||
"+---------------------+--------------------------+\
|
||||
\n| ts | SUM(http_requests.value) |\
|
||||
\n+---------------------+--------------------------+\
|
||||
\n| 1970-01-01T00:00:00 | 1000 |\
|
||||
\n| 1970-01-01T00:00:00 | 1000.0 |\
|
||||
\n+---------------------+--------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -354,8 +354,8 @@ async fn aggregators_empty_without() {
|
||||
"+------------+----------+------------+---------------------+--------------------------+\
|
||||
\n| group | instance | job | ts | SUM(http_requests.value) |\
|
||||
\n+------------+----------+------------+---------------------+--------------------------+\
|
||||
\n| production | 0 | api-server | 1970-01-01T00:00:00 | 100 |\
|
||||
\n| production | 1 | api-server | 1970-01-01T00:00:00 | 200 |\
|
||||
\n| production | 0 | api-server | 1970-01-01T00:00:00 | 100.0 |\
|
||||
\n| production | 1 | api-server | 1970-01-01T00:00:00 | 200.0 |\
|
||||
\n+------------+----------+------------+---------------------+--------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -378,8 +378,8 @@ async fn aggregators_complex_combined_aggrs() {
|
||||
"+------------+-----------------------------------------------------------------------------------------------------------+\
|
||||
\n| job | SUM(http_requests.value) + MIN(http_requests.value) + MAX(http_requests.value) + AVG(http_requests.value) |\
|
||||
\n+------------+-----------------------------------------------------------------------------------------------------------+\
|
||||
\n| api-server | 1750 |\
|
||||
\n| app-server | 4550 |\
|
||||
\n| api-server | 1750.0 |\
|
||||
\n| app-server | 4550.0 |\
|
||||
\n+------------+-----------------------------------------------------------------------------------------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -399,8 +399,8 @@ async fn two_aggregators_combined_aggrs() {
|
||||
"+------------+-----------------------------------------------------+\
|
||||
\n| job | SUM(http_requests.value) + MIN(http_requests.value) |\
|
||||
\n+------------+-----------------------------------------------------+\
|
||||
\n| api-server | 1100 |\
|
||||
\n| app-server | 3100 |\
|
||||
\n| api-server | 1100.0 |\
|
||||
\n| app-server | 3100.0 |\
|
||||
\n+------------+-----------------------------------------------------+",
|
||||
)
|
||||
.await;
|
||||
@@ -444,14 +444,14 @@ async fn binary_op_plain_columns() {
|
||||
"+------------+----------+------------+---------------------+-------------------------------------------+\
|
||||
\n| job | instance | group | ts | http_requests.value - http_requests.value |\
|
||||
\n+------------+----------+------------+---------------------+-------------------------------------------+\
|
||||
\n| api-server | 0 | canary | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| api-server | 0 | production | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| api-server | 1 | canary | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| api-server | 1 | production | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| app-server | 0 | canary | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| app-server | 0 | production | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| app-server | 1 | canary | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| app-server | 1 | production | 1970-01-01T00:00:00 | 0 |\
|
||||
\n| api-server | 0 | canary | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| api-server | 0 | production | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| api-server | 1 | canary | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| api-server | 1 | production | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| app-server | 0 | canary | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| app-server | 0 | production | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| app-server | 1 | canary | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n| app-server | 1 | production | 1970-01-01T00:00:00 | 0.0 |\
|
||||
\n+------------+----------+------------+---------------------+-------------------------------------------+",
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema};
|
||||
use mito::config::EngineConfig;
|
||||
@@ -26,9 +27,8 @@ use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::requests::{CreateTableRequest, TableOptions};
|
||||
use tempdir::TempDir;
|
||||
|
||||
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, WalConfig};
|
||||
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, WalConfig};
|
||||
use crate::error::{CreateTableSnafu, Result};
|
||||
use crate::instance::Instance;
|
||||
use crate::sql::SqlHandler;
|
||||
@@ -36,6 +36,7 @@ use crate::sql::SqlHandler;
|
||||
pub(crate) struct MockInstance {
|
||||
instance: Instance,
|
||||
_guard: TestGuard,
|
||||
_procedure_dir: Option<TempDir>,
|
||||
}
|
||||
|
||||
impl MockInstance {
|
||||
@@ -45,12 +46,39 @@ impl MockInstance {
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
MockInstance { instance, _guard }
|
||||
MockInstance {
|
||||
instance,
|
||||
_guard,
|
||||
_procedure_dir: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn with_procedure_enabled(name: &str) -> Self {
|
||||
let (mut opts, _guard) = create_tmp_dir_and_datanode_opts(name);
|
||||
let procedure_dir = create_temp_dir(&format!("gt_procedure_{name}"));
|
||||
opts.procedure = Some(ProcedureConfig {
|
||||
store: ObjectStoreConfig::File(FileConfig {
|
||||
data_dir: procedure_dir.path().to_str().unwrap().to_string(),
|
||||
}),
|
||||
});
|
||||
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
MockInstance {
|
||||
instance,
|
||||
_guard,
|
||||
_procedure_dir: Some(procedure_dir),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn inner(&self) -> &Instance {
|
||||
&self.instance
|
||||
}
|
||||
|
||||
pub(crate) fn data_tmp_dir(&self) -> &TempDir {
|
||||
&self._guard._data_tmp_dir
|
||||
}
|
||||
}
|
||||
|
||||
struct TestGuard {
|
||||
@@ -59,8 +87,8 @@ struct TestGuard {
|
||||
}
|
||||
|
||||
fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard) {
|
||||
let wal_tmp_dir = TempDir::new(&format!("gt_wal_{name}")).unwrap();
|
||||
let data_tmp_dir = TempDir::new(&format!("gt_data_{name}")).unwrap();
|
||||
let wal_tmp_dir = create_temp_dir(&format!("gt_wal_{name}"));
|
||||
let data_tmp_dir = create_temp_dir(&format!("gt_data_{name}"));
|
||||
let opts = DatanodeOptions {
|
||||
wal: WalConfig {
|
||||
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
|
||||
|
||||
@@ -341,7 +341,7 @@ mod tests {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
|
||||
let vector = column_schema.create_default_vector_for_padding(4);
|
||||
assert_eq!(4, vector.len());
|
||||
let expect: VectorRef = Arc::new(Int32Vector::from_slice(&[0, 0, 0, 0]));
|
||||
let expect: VectorRef = Arc::new(Int32Vector::from_slice([0, 0, 0, 0]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
|
||||
@@ -345,7 +345,7 @@ mod tests {
|
||||
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
|
||||
@@ -365,7 +365,7 @@ mod tests {
|
||||
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
|
||||
@@ -16,14 +16,14 @@ use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
use arrow::array::{Array, ArrayRef, UInt32Array};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::error::{self, Result, SerializeSnafu};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{BooleanVector, Helper, Validity, Vector, VectorRef};
|
||||
use crate::vectors::{BooleanVector, Helper, UInt32Vector, Validity, Vector, VectorRef};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ConstantVector {
|
||||
@@ -83,6 +83,35 @@ impl ConstantVector {
|
||||
self.length,
|
||||
)))
|
||||
}
|
||||
|
||||
pub(crate) fn take_vector(&self, indices: &UInt32Vector) -> Result<VectorRef> {
|
||||
if indices.is_empty() {
|
||||
return Ok(self.slice(0, 0));
|
||||
}
|
||||
ensure!(
|
||||
indices.null_count() == 0,
|
||||
error::UnsupportedOperationSnafu {
|
||||
op: "taking a null index",
|
||||
vector_type: self.vector_type_name(),
|
||||
}
|
||||
);
|
||||
|
||||
let len = self.len();
|
||||
let arr = indices.to_arrow_array();
|
||||
let indices_arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
|
||||
if !arrow::compute::min_boolean(
|
||||
&arrow::compute::lt_scalar(indices_arr, len as u32).unwrap(),
|
||||
)
|
||||
.unwrap()
|
||||
{
|
||||
panic!("Array index out of bounds, cannot take index out of the length of the array: {len}");
|
||||
}
|
||||
|
||||
Ok(Arc::new(ConstantVector::new(
|
||||
self.inner().clone(),
|
||||
indices.len(),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for ConstantVector {
|
||||
|
||||
@@ -58,7 +58,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_date_scalar() {
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
let vector = DateVector::from_slice([1, 2]);
|
||||
assert_eq!(2, vector.len());
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(Some(Date::new(2)), vector.get_data(1));
|
||||
@@ -66,24 +66,24 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_date_vector_builder() {
|
||||
let input = DateVector::from_slice(&[1, 2, 3]);
|
||||
let input = DateVector::from_slice([1, 2, 3]);
|
||||
|
||||
let mut builder = DateType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Date(Date::new(5)));
|
||||
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[5, 2, 3]));
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice([5, 2, 3]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_from_arrow() {
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
let vector = DateVector::from_slice([1, 2]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
@@ -91,7 +91,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_date_vector() {
|
||||
let vector = DateVector::from_slice(&[-1, 0, 1]);
|
||||
let vector = DateVector::from_slice([-1, 0, 1]);
|
||||
let serialized_json = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(
|
||||
r#"["1969-12-31","1970-01-01","1970-01-02"]"#,
|
||||
|
||||
@@ -81,7 +81,7 @@ mod tests {
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input = DateTimeVector::from_wrapper_slice(&[
|
||||
let input = DateTimeVector::from_wrapper_slice([
|
||||
DateTime::new(1),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
@@ -92,11 +92,11 @@ mod tests {
|
||||
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice(&[
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice([
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
@@ -106,7 +106,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_datetime_from_arrow() {
|
||||
let vector = DateTimeVector::from_wrapper_slice(&[DateTime::new(1), DateTime::new(2)]);
|
||||
let vector = DateTimeVector::from_wrapper_slice([DateTime::new(1), DateTime::new(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateTimeVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
|
||||
@@ -167,30 +167,30 @@ mod tests {
|
||||
Some("world"),
|
||||
])));
|
||||
|
||||
assert_vector_ref_eq(Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt8Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int16Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt16Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt32Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int64Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt64Vector::from_slice(&[1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Float32Vector::from_slice(&[1.0, 2.0, 3.0, 4.0])));
|
||||
assert_vector_ref_eq(Arc::new(Float64Vector::from_slice(&[1.0, 2.0, 3.0, 4.0])));
|
||||
assert_vector_ref_eq(Arc::new(Int8Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt8Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int16Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt16Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int32Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt32Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Int64Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(UInt64Vector::from_slice([1, 2, 3, 4])));
|
||||
assert_vector_ref_eq(Arc::new(Float32Vector::from_slice([1.0, 2.0, 3.0, 4.0])));
|
||||
assert_vector_ref_eq(Arc::new(Float64Vector::from_slice([1.0, 2.0, 3.0, 4.0])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_ne() {
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2])),
|
||||
Arc::new(Int32Vector::from_slice([1, 2, 3, 4])),
|
||||
Arc::new(Int32Vector::from_slice([1, 2])),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(Int32Vector::from_slice([1, 2, 3, 4])),
|
||||
Arc::new(Int8Vector::from_slice([1, 2, 3, 4])),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
|
||||
Arc::new(Int32Vector::from_slice([1, 2, 3, 4])),
|
||||
Arc::new(BooleanVector::from(vec![true, true])),
|
||||
);
|
||||
assert_vector_ref_ne(
|
||||
|
||||
@@ -264,7 +264,8 @@ impl Helper {
|
||||
| ArrowDataType::Dictionary(_, _)
|
||||
| ArrowDataType::Decimal128(_, _)
|
||||
| ArrowDataType::Decimal256(_, _)
|
||||
| ArrowDataType::Map(_, _) => {
|
||||
| ArrowDataType::Map(_, _)
|
||||
| ArrowDataType::RunEndEncoded(_, _) => {
|
||||
unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type())
|
||||
}
|
||||
})
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user