mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-26 16:10:02 +00:00
Compare commits
78 Commits
geo
...
v0.1.0-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c9bcbe885 | ||
|
|
dfd4b10493 | ||
|
|
dd488e8d21 | ||
|
|
857054f70d | ||
|
|
a41aec0a86 | ||
|
|
cff8fe4e0e | ||
|
|
a2f9b788f1 | ||
|
|
43f9c40f43 | ||
|
|
af1df2066c | ||
|
|
f34a99ff5a | ||
|
|
89a3b39728 | ||
|
|
2137587091 | ||
|
|
172c9a1e21 | ||
|
|
ae147c2a74 | ||
|
|
c2e1b0857c | ||
|
|
6e99bb8490 | ||
|
|
eef20887cc | ||
|
|
16500b045b | ||
|
|
3d195ff858 | ||
|
|
bc701d3e7f | ||
|
|
6373bb04f9 | ||
|
|
bfcd74fd16 | ||
|
|
fc6d73b06b | ||
|
|
db2b577628 | ||
|
|
cba611b9f5 | ||
|
|
6aec1b4f90 | ||
|
|
6d1dd5e7af | ||
|
|
e19b63f4f5 | ||
|
|
750310c648 | ||
|
|
9fd2d4e8db | ||
|
|
77233c20e1 | ||
|
|
1fad67cf4d | ||
|
|
5abff7a536 | ||
|
|
6f1f697bfc | ||
|
|
2d4a44414d | ||
|
|
ea2ebc0e87 | ||
|
|
dacfd12b8f | ||
|
|
518b665f1e | ||
|
|
e2c28fe374 | ||
|
|
f4e22282a4 | ||
|
|
0604eb7509 | ||
|
|
81716d622e | ||
|
|
3e8d9b421c | ||
|
|
6d4c0ad5a3 | ||
|
|
00966cad69 | ||
|
|
932b30d299 | ||
|
|
7fe39e9187 | ||
|
|
2ca667cbdf | ||
|
|
64dac51e83 | ||
|
|
edad6f89b5 | ||
|
|
8ab43b65ea | ||
|
|
6fc45e31e0 | ||
|
|
a457c49d99 | ||
|
|
b650656ae3 | ||
|
|
bc9a2df9bf | ||
|
|
6b0c5281d4 | ||
|
|
fad8f442ef | ||
|
|
2d52f19662 | ||
|
|
d5800d0b60 | ||
|
|
fbea07ea83 | ||
|
|
87130adf54 | ||
|
|
c147657275 | ||
|
|
d5b34f8917 | ||
|
|
4d08ee6fbb | ||
|
|
94b263c261 | ||
|
|
c6d91edb83 | ||
|
|
cdf3280fcf | ||
|
|
f243649971 | ||
|
|
69ba4581b7 | ||
|
|
f942b53ed0 | ||
|
|
25a16875b6 | ||
|
|
494a93c4f2 | ||
|
|
b61d5989b7 | ||
|
|
a8a6426abf | ||
|
|
e99668092c | ||
|
|
0c829a9712 | ||
|
|
752be8dc41 | ||
|
|
2e1ab050a7 |
2
.cargo/config.toml
Normal file
2
.cargo/config.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
[target.aarch64-unknown-linux-gnu]
|
||||
linker = "aarch64-linux-gnu-gcc"
|
||||
2
.github/pr-title-checker-config.json
vendored
2
.github/pr-title-checker-config.json
vendored
@@ -4,7 +4,7 @@
|
||||
"color": "B60205"
|
||||
},
|
||||
"CHECKS": {
|
||||
"regexp": "^(feat|fix|test|refactor|chore|style|doc|perf|build|ci|revert)(\\(.*\\))?:.*",
|
||||
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?:.*",
|
||||
"ignoreLabels" : ["ignore-title"]
|
||||
}
|
||||
}
|
||||
|
||||
15
.github/workflows/coverage.yml
vendored
15
.github/workflows/coverage.yml
vendored
@@ -15,9 +15,20 @@ jobs:
|
||||
grcov:
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Cache LLVM and Clang
|
||||
id: cache-llvm
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ./llvm
|
||||
key: llvm
|
||||
- uses: arduino/setup-protoc@v1
|
||||
- uses: KyleMayes/install-llvm-action@v1
|
||||
with:
|
||||
version: "14.0"
|
||||
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
|
||||
- name: Install toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
@@ -29,7 +40,7 @@ jobs:
|
||||
- name: Cleanup disk
|
||||
uses: curoky/cleanup-disk-action@v2.0
|
||||
with:
|
||||
retain: 'rust'
|
||||
retain: 'rust,llvm'
|
||||
- name: Execute tests
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -38,7 +49,7 @@ jobs:
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
CARGO_INCREMENTAL: 0
|
||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests"
|
||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests -Clink-arg=-fuse-ld=lld"
|
||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
|
||||
|
||||
31
.github/workflows/develop.yml
vendored
31
.github/workflows/develop.yml
vendored
@@ -1,6 +1,18 @@
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, ready_for_review]
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- main
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- 'config/**'
|
||||
- '.github/**'
|
||||
- '**.md'
|
||||
- '**.yml'
|
||||
- '.dockerignore'
|
||||
- 'docker/**'
|
||||
|
||||
name: Continuous integration for developing
|
||||
|
||||
@@ -12,6 +24,7 @@ jobs:
|
||||
name: Check
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: arduino/setup-protoc@v1
|
||||
@@ -31,9 +44,20 @@ jobs:
|
||||
name: Test Suite
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Cache LLVM and Clang
|
||||
id: cache-llvm
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ./llvm
|
||||
key: llvm
|
||||
- uses: arduino/setup-protoc@v1
|
||||
- uses: KyleMayes/install-llvm-action@v1
|
||||
with:
|
||||
version: "14.0"
|
||||
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
@@ -41,11 +65,16 @@ jobs:
|
||||
override: true
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2.0.0
|
||||
- name: Cleanup disk
|
||||
uses: curoky/cleanup-disk-action@v2.0
|
||||
with:
|
||||
retain: 'rust,llvm'
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||
RUST_BACKTRACE: 1
|
||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
@@ -56,6 +85,7 @@ jobs:
|
||||
name: Rustfmt
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: arduino/setup-protoc@v1
|
||||
@@ -76,6 +106,7 @@ jobs:
|
||||
name: Clippy
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: arduino/setup-protoc@v1
|
||||
|
||||
1
.github/workflows/pr-title-checker.yml
vendored
1
.github/workflows/pr-title-checker.yml
vendored
@@ -11,6 +11,7 @@ on:
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: thehanimo/pr-title-checker@v1.3.4
|
||||
with:
|
||||
|
||||
188
.github/workflows/release.yml
vendored
Normal file
188
.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
|
||||
name: Release
|
||||
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build binary
|
||||
strategy:
|
||||
matrix:
|
||||
# The file format is greptime-<tag>.<os>-<arch>
|
||||
include:
|
||||
- arch: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
file: greptime-${{ github.ref_name }}.linux-amd64
|
||||
- arch: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
file: greptime-${{ github.ref_name }}.linux-arm64
|
||||
- arch: aarch64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-${{ github.ref_name }}.darwin-arm64
|
||||
- arch: x86_64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-${{ github.ref_name }}.darwin-amd64
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Cache cargo assets
|
||||
id: cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./llvm
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
target/
|
||||
key: ${{ matrix.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- name: Install Protoc for linux
|
||||
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
|
||||
run: | # Make sure the protoc is >= 3.15
|
||||
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip
|
||||
unzip protoc-21.9-linux-x86_64.zip -d protoc
|
||||
sudo cp protoc/bin/protoc /usr/local/bin/
|
||||
sudo cp -r protoc/include/google /usr/local/include/
|
||||
|
||||
- uses: KyleMayes/install-llvm-action@v1
|
||||
with:
|
||||
version: "14.0"
|
||||
cached: ${{ steps.cache.outputs.cache-hit }}
|
||||
|
||||
- name: Install Protoc for macos
|
||||
if: contains(matrix.arch, 'darwin')
|
||||
run: |
|
||||
brew install protobuf
|
||||
|
||||
- name: Install dependencies for linux
|
||||
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
|
||||
run: |
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y install libssl-dev pkg-config g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
|
||||
|
||||
- name: Install stable toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
override: true
|
||||
target: ${{ matrix.arch }}
|
||||
|
||||
- name: Output package versions
|
||||
run: protoc --version ; cargo version ; rustc --version ; gcc --version ; g++ --version
|
||||
|
||||
- name: Run cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: ${{ matrix.opts }} --release --locked --target ${{ matrix.arch }}
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||
|
||||
- name: Calculate checksum and rename binary
|
||||
shell: bash
|
||||
run: |
|
||||
cd target/${{ matrix.arch }}/release
|
||||
cp greptime ${{ matrix.file }}
|
||||
echo $(shasum -a 256 greptime | cut -f1 -d' ') > ${{ matrix.file }}.sha256sum
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.file }}
|
||||
path: target/${{ matrix.arch }}/release/${{ matrix.file }}
|
||||
|
||||
- name: Upload checksum of artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.file }}.sha256sum
|
||||
path: target/${{ matrix.arch }}/release/${{ matrix.file }}.sha256sum
|
||||
release:
|
||||
name: Release artifacts
|
||||
needs: [build]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
|
||||
- name: Publish release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
name: "Release ${{ github.ref_name }}"
|
||||
files: |
|
||||
**/greptime-${{ github.ref_name }}.*
|
||||
|
||||
docker:
|
||||
name: Build docker image
|
||||
needs: [build]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Download amd64 binary
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: greptime-${{ github.ref_name }}.linux-amd64
|
||||
path: amd64
|
||||
|
||||
- name: Rename amd64 binary
|
||||
run: |
|
||||
mv amd64/greptime-${{ github.ref_name }}.linux-amd64 amd64/greptime
|
||||
|
||||
- name: Download arm64 binary
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: greptime-${{ github.ref_name }}.linux-arm64
|
||||
path: arm64
|
||||
|
||||
- name: Rename arm64 binary
|
||||
run: |
|
||||
mv arm64/greptime-${{ github.ref_name }}.linux-arm64 arm64/greptime
|
||||
|
||||
- name: Set file permissions
|
||||
shell: bash
|
||||
run: |
|
||||
chmod +x amd64/greptime arm64/greptime
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Dockerhub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: .
|
||||
file: ./docker/ci/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
ghcr.io/greptimeteam/greptimedb:${{ github.ref_name }}
|
||||
greptime/greptimedb:${{ github.ref_name }}
|
||||
9
.gitignore
vendored
9
.gitignore
vendored
@@ -19,12 +19,15 @@ debug/
|
||||
# JetBrains IDE config directory
|
||||
.idea/
|
||||
|
||||
# VSCode IDE config directory
|
||||
.vscode/
|
||||
|
||||
# Logs
|
||||
**/__unittest_logs
|
||||
logs/
|
||||
|
||||
.DS_store
|
||||
.gitignore
|
||||
|
||||
# cpython's generated python byte code
|
||||
**/__pycache__/
|
||||
|
||||
# Benchmark dataset
|
||||
benchmarks/data
|
||||
|
||||
@@ -9,7 +9,7 @@ repos:
|
||||
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
|
||||
hooks:
|
||||
- id: cargo-sort
|
||||
args: ["--workspace"]
|
||||
args: ["--workspace", "--print"]
|
||||
|
||||
- repo: https://github.com/doublify/pre-commit-rust
|
||||
rev: v1.0
|
||||
|
||||
132
CODE_OF_CONDUCT.md
Normal file
132
CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
info@greptime.com.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
@@ -10,6 +10,34 @@ To learn about the design of GreptimeDB, please refer to the [design docs](https
|
||||
- Make sure all unit tests are passed.
|
||||
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
|
||||
|
||||
|
||||
#### `pre-commit` Hooks
|
||||
You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.
|
||||
|
||||
1. Install `pre-commit`
|
||||
```
|
||||
$ pip install pre-commit
|
||||
```
|
||||
or
|
||||
```
|
||||
$ brew install pre-commit
|
||||
```
|
||||
|
||||
2. Install the `pre-commit` hooks
|
||||
```
|
||||
$ pre-commit install
|
||||
pre-commit installed at .git/hooks/pre-commit
|
||||
|
||||
$ pre-commit install --hook-type commit-msg
|
||||
pre-commit installed at .git/hooks/commit-msg
|
||||
|
||||
$ pre-commit install --hook-type pre-push
|
||||
pre-commit installed at .git/hooks/pre-pus
|
||||
```
|
||||
|
||||
now `pre-commit` will run automatically on `git commit`.
|
||||
|
||||
|
||||
### Title
|
||||
|
||||
The titles of pull requests should be prefixed with category name listed in [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0)
|
||||
@@ -32,4 +60,4 @@ of what you were trying to do and what went wrong. You can also reach for help i
|
||||
|
||||
|
||||
## Bug report
|
||||
To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
|
||||
To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
|
||||
|
||||
1845
Cargo.lock
generated
1845
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
11
Cargo.toml
@@ -1,10 +1,12 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"benchmarks",
|
||||
"src/api",
|
||||
"src/catalog",
|
||||
"src/client",
|
||||
"src/cmd",
|
||||
"src/common/base",
|
||||
"src/common/catalog",
|
||||
"src/common/error",
|
||||
"src/common/function",
|
||||
"src/common/function-macro",
|
||||
@@ -12,13 +14,15 @@ members = [
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
"src/common/runtime",
|
||||
"src/common/substrait",
|
||||
"src/common/telemetry",
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/logical-plans",
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
"src/object-store",
|
||||
"src/query",
|
||||
"src/script",
|
||||
@@ -28,8 +32,7 @@ members = [
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/table-engine",
|
||||
"test-util",
|
||||
]
|
||||
|
||||
[patch.crates-io]
|
||||
sqlparser = { git = "https://github.com/sunng87/sqlparser-rs.git", branch = "feature/argument-for-custom-type-for-v015" }
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
51
README.md
51
README.md
@@ -11,7 +11,6 @@ GreptimeDB: the next-generation hybrid timeseries/analytics processing database
|
||||
To compile GreptimeDB from source, you'll need the following:
|
||||
- Rust
|
||||
- Protobuf
|
||||
- OpenSSL
|
||||
|
||||
#### Rust
|
||||
|
||||
@@ -23,23 +22,6 @@ The easiest way to install Rust is to use [`rustup`](https://rustup.rs/), which
|
||||
major package manager on macos and linux distributions. You can find an
|
||||
installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
|
||||
#### OpenSSL
|
||||
|
||||
For Ubuntu:
|
||||
```bash
|
||||
sudo apt install libssl-dev
|
||||
```
|
||||
|
||||
For RedHat-based: Fedora, Oracle Linux, etc:
|
||||
```bash
|
||||
sudo dnf install openssl-devel
|
||||
```
|
||||
|
||||
For macOS:
|
||||
```bash
|
||||
brew install openssl
|
||||
```
|
||||
|
||||
### Build the Docker Image
|
||||
|
||||
```
|
||||
@@ -125,7 +107,7 @@ cargo run -- --log-dir=logs --log-level=debug frontend start -c ./config/fronten
|
||||
cpu DOUBLE DEFAULT 0,
|
||||
memory DOUBLE,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(ts,host)) ENGINE=mito WITH(regions=1);
|
||||
PRIMARY KEY(host)) ENGINE=mito WITH(regions=1);
|
||||
```
|
||||
|
||||
3. Insert data:
|
||||
@@ -151,33 +133,6 @@ cargo run -- --log-dir=logs --log-level=debug frontend start -c ./config/fronten
|
||||
```
|
||||
You can delete your data by removing `/tmp/greptimedb`.
|
||||
|
||||
## Contribute
|
||||
## Contributing
|
||||
|
||||
1. [Install rust](https://www.rust-lang.org/tools/install)
|
||||
2. [Install `pre-commit`](https://pre-commit.com/#plugins) for run hooks on every commit automatically such as `cargo fmt` etc.
|
||||
|
||||
```
|
||||
$ pip install pre-commit
|
||||
|
||||
or
|
||||
|
||||
$ brew install pre-commit
|
||||
$
|
||||
```
|
||||
|
||||
3. Install the git hook scripts:
|
||||
|
||||
```
|
||||
$ pre-commit install
|
||||
pre-commit installed at .git/hooks/pre-commit
|
||||
|
||||
$ pre-commit install --hook-type commit-msg
|
||||
pre-commit installed at .git/hooks/commit-msg
|
||||
|
||||
$ pre-commit install --hook-type pre-push
|
||||
pre-commit installed at .git/hooks/pre-pus
|
||||
```
|
||||
|
||||
now `pre-commit` will run automatically on `git commit`.
|
||||
|
||||
4. Check out branch from `develop` and make your contribution. Follow the [style guide](https://github.com/GreptimeTeam/docs/blob/main/style-guide/zh.md). Create a PR when you are ready, feel free and have fun!
|
||||
Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
|
||||
|
||||
14
benchmarks/Cargo.toml
Normal file
14
benchmarks/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
arrow = "10"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
client = { path = "../src/client" }
|
||||
indicatif = "0.17.1"
|
||||
itertools = "0.10.5"
|
||||
parquet = { version = "*" }
|
||||
tokio = { version = "1.21", features = ["full"] }
|
||||
439
benchmarks/src/bin/nyc-taxi.rs
Normal file
439
benchmarks/src/bin/nyc-taxi.rs
Normal file
@@ -0,0 +1,439 @@
|
||||
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
|
||||
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
|
||||
|
||||
#![feature(once_cell)]
|
||||
#![allow(clippy::print_stdout)]
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
use arrow::{
|
||||
array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray},
|
||||
datatypes::{DataType, Float64Type, Int64Type},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use clap::Parser;
|
||||
use client::{
|
||||
admin::Admin,
|
||||
api::v1::{
|
||||
codec::InsertBatch, column::Values, insert_expr, Column, ColumnDataType, ColumnDef,
|
||||
CreateExpr, InsertExpr,
|
||||
},
|
||||
Client, Database, Select,
|
||||
};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::{
|
||||
arrow::{ArrowReader, ParquetFileArrowReader},
|
||||
file::{reader::FileReader, serialized_reader::SerializedFileReader},
|
||||
};
|
||||
use tokio::task::JoinSet;
|
||||
|
||||
const DATABASE_NAME: &str = "greptime";
|
||||
const CATALOG_NAME: &str = "greptime";
|
||||
const SCHEMA_NAME: &str = "public";
|
||||
const TABLE_NAME: &str = "nyc_taxi";
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "NYC benchmark runner")]
|
||||
struct Args {
|
||||
/// Path to the dataset
|
||||
#[arg(short, long)]
|
||||
path: Option<String>,
|
||||
|
||||
/// Batch size of insert request.
|
||||
#[arg(short = 's', long = "batch-size", default_value_t = 4096)]
|
||||
batch_size: usize,
|
||||
|
||||
/// Number of client threads on write (parallel on file level)
|
||||
#[arg(short = 't', long = "thread-num", default_value_t = 4)]
|
||||
thread_num: usize,
|
||||
|
||||
/// Number of query iteration
|
||||
#[arg(short = 'i', long = "iter-num", default_value_t = 3)]
|
||||
iter_num: usize,
|
||||
|
||||
#[arg(long = "skip-write")]
|
||||
skip_write: bool,
|
||||
|
||||
#[arg(long = "skip-read")]
|
||||
skip_read: bool,
|
||||
|
||||
#[arg(short, long, default_value_t = String::from("127.0.0.1:3001"))]
|
||||
endpoint: String,
|
||||
}
|
||||
|
||||
fn get_file_list<P: AsRef<Path>>(path: P) -> Vec<PathBuf> {
|
||||
std::fs::read_dir(path)
|
||||
.unwrap()
|
||||
.map(|dir| dir.unwrap().path().canonicalize().unwrap())
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn write_data(
|
||||
batch_size: usize,
|
||||
db: &Database,
|
||||
path: PathBuf,
|
||||
mpb: MultiProgress,
|
||||
pb_style: ProgressStyle,
|
||||
) -> u128 {
|
||||
let file = std::fs::File::open(&path).unwrap();
|
||||
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
|
||||
let row_num = file_reader.metadata().file_metadata().num_rows();
|
||||
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
|
||||
.get_record_reader(batch_size)
|
||||
.unwrap();
|
||||
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
||||
progress_bar.set_style(pb_style);
|
||||
progress_bar.set_message(format!("{:?}", path));
|
||||
|
||||
let mut total_rpc_elapsed_ms = 0;
|
||||
|
||||
for record_batch in record_batch_reader {
|
||||
let record_batch = record_batch.unwrap();
|
||||
let row_count = record_batch.num_rows();
|
||||
let insert_batch = convert_record_batch(record_batch).into();
|
||||
let insert_expr = InsertExpr {
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch],
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
};
|
||||
let now = Instant::now();
|
||||
db.insert(insert_expr).await.unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
total_rpc_elapsed_ms += elapsed.as_millis();
|
||||
progress_bar.inc(row_count as _);
|
||||
}
|
||||
|
||||
progress_bar.finish_with_message(format!(
|
||||
"file {:?} done in {}ms",
|
||||
path, total_rpc_elapsed_ms
|
||||
));
|
||||
total_rpc_elapsed_ms
|
||||
}
|
||||
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
let schema = record_batch.schema();
|
||||
let fields = schema.fields();
|
||||
let row_count = record_batch.num_rows();
|
||||
let mut columns = vec![];
|
||||
|
||||
for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
|
||||
let values = build_values(array);
|
||||
let column = Column {
|
||||
column_name: field.name().to_owned(),
|
||||
values: Some(values),
|
||||
null_mask: vec![],
|
||||
// datatype and semantic_type are set to default
|
||||
..Default::default()
|
||||
};
|
||||
columns.push(column);
|
||||
}
|
||||
|
||||
InsertBatch {
|
||||
columns,
|
||||
row_count: row_count as _,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_values(column: &ArrayRef) -> Values {
|
||||
match column.data_type() {
|
||||
DataType::Int64 => {
|
||||
let array = column
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<Int64Type>>()
|
||||
.unwrap();
|
||||
let values = array.values();
|
||||
Values {
|
||||
i64_values: values.to_vec(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let array = column
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<Float64Type>>()
|
||||
.unwrap();
|
||||
let values = array.values();
|
||||
Values {
|
||||
f64_values: values.to_vec(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Timestamp(_, _) => {
|
||||
let array = column
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampNanosecondArray>()
|
||||
.unwrap();
|
||||
let values = array.values();
|
||||
Values {
|
||||
i64_values: values.to_vec(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Utf8 => {
|
||||
let array = column.as_any().downcast_ref::<StringArray>().unwrap();
|
||||
let values = array.iter().filter_map(|s| s.map(String::from)).collect();
|
||||
Values {
|
||||
string_values: values,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Null
|
||||
| DataType::Boolean
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Float16
|
||||
| DataType::Float32
|
||||
| DataType::Date32
|
||||
| DataType::Date64
|
||||
| DataType::Time32(_)
|
||||
| DataType::Time64(_)
|
||||
| DataType::Duration(_)
|
||||
| DataType::Interval(_)
|
||||
| DataType::Binary
|
||||
| DataType::FixedSizeBinary(_)
|
||||
| DataType::LargeBinary
|
||||
| DataType::LargeUtf8
|
||||
| DataType::List(_)
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_table_expr() -> CreateExpr {
|
||||
CreateExpr {
|
||||
catalog_name: Some(CATALOG_NAME.to_string()),
|
||||
schema_name: Some(SCHEMA_NAME.to_string()),
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
desc: None,
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "VendorID".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tpep_pickup_datetime".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tpep_dropoff_datetime".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "passenger_count".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "trip_distance".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "RatecodeID".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "store_and_fwd_flag".to_string(),
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "PULocationID".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "DOLocationID".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "payment_type".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "fare_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "extra".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "mta_tax".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tip_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tolls_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "improvement_surcharge".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "total_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "congestion_surcharge".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "airport_fee".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
],
|
||||
time_index: "tpep_pickup_datetime".to_string(),
|
||||
primary_keys: vec!["VendorID".to_string()],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn query_set() -> HashMap<String, String> {
|
||||
let mut ret = HashMap::new();
|
||||
|
||||
ret.insert(
|
||||
"count_all".to_string(),
|
||||
format!("SELECT COUNT(*) FROM {};", TABLE_NAME),
|
||||
);
|
||||
|
||||
ret.insert(
|
||||
"fare_amt_by_passenger".to_string(),
|
||||
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {} GROUP BY passenger_count",TABLE_NAME)
|
||||
);
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
async fn do_write(args: &Args, client: &Client) {
|
||||
let admin = Admin::new("admin", client.clone());
|
||||
|
||||
let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument"));
|
||||
let mut write_jobs = JoinSet::new();
|
||||
|
||||
let create_table_result = admin.create(create_table_expr()).await;
|
||||
println!("Create table result: {:?}", create_table_result);
|
||||
|
||||
let progress_bar_style = ProgressStyle::with_template(
|
||||
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("##-");
|
||||
let multi_progress_bar = MultiProgress::new();
|
||||
let file_progress = multi_progress_bar.add(ProgressBar::new(file_list.len() as _));
|
||||
file_progress.inc(0);
|
||||
|
||||
let batch_size = args.batch_size;
|
||||
for _ in 0..args.thread_num {
|
||||
if let Some(path) = file_list.pop() {
|
||||
let db = Database::new(DATABASE_NAME, client.clone());
|
||||
let mpb = multi_progress_bar.clone();
|
||||
let pb_style = progress_bar_style.clone();
|
||||
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
|
||||
}
|
||||
}
|
||||
while write_jobs.join_next().await.is_some() {
|
||||
file_progress.inc(1);
|
||||
if let Some(path) = file_list.pop() {
|
||||
let db = Database::new(DATABASE_NAME, client.clone());
|
||||
let mpb = multi_progress_bar.clone();
|
||||
let pb_style = progress_bar_style.clone();
|
||||
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_query(num_iter: usize, db: &Database) {
|
||||
for (query_name, query) in query_set() {
|
||||
println!("Running query: {}", query);
|
||||
for i in 0..num_iter {
|
||||
let now = Instant::now();
|
||||
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
println!(
|
||||
"query {}, iteration {}: {}ms",
|
||||
query_name,
|
||||
i,
|
||||
elapsed.as_millis()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(args.thread_num)
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap()
|
||||
.block_on(async {
|
||||
let client = Client::with_urls(vec![&args.endpoint]);
|
||||
|
||||
if !args.skip_write {
|
||||
do_write(&args, &client).await;
|
||||
}
|
||||
|
||||
if !args.skip_read {
|
||||
let db = Database::new(DATABASE_NAME, client.clone());
|
||||
do_query(args.iter_num, &db).await;
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
node_id = 42
|
||||
http_addr = '0.0.0.0:3000'
|
||||
rpc_addr = '0.0.0.0:3001'
|
||||
wal_dir = '/tmp/greptimedb/wal'
|
||||
|
||||
rpc_runtime_size = 8
|
||||
mode = "standalone"
|
||||
mysql_addr = '0.0.0.0:3306'
|
||||
mysql_runtime_size = 4
|
||||
|
||||
@@ -12,3 +14,9 @@ postgres_runtime_size = 4
|
||||
[storage]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/data/'
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addr = "1.1.1.1:3002"
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = true
|
||||
|
||||
4
config/metasrv.example.toml
Normal file
4
config/metasrv.example.toml
Normal file
@@ -0,0 +1,4 @@
|
||||
bind_addr = '127.0.0.1:3002'
|
||||
server_addr = '0.0.0.0:3002'
|
||||
store_addr = '127.0.0.1:2380'
|
||||
datanode_lease_secs = 30
|
||||
@@ -24,9 +24,8 @@ RUN cargo build --release
|
||||
# TODO(zyy17): Maybe should use the more secure container image.
|
||||
FROM ubuntu:22.04 as base
|
||||
|
||||
WORKDIR /greptimedb
|
||||
COPY --from=builder /greptimedb/target/release/greptime /greptimedb/bin/
|
||||
ENV PATH /greptimedb/bin/:$PATH
|
||||
WORKDIR /greptime
|
||||
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
|
||||
ENV PATH /greptime/bin/:$PATH
|
||||
|
||||
ENTRYPOINT [ "greptime" ]
|
||||
CMD [ "datanode", "start"]
|
||||
ENTRYPOINT ["greptime"]
|
||||
|
||||
9
docker/ci/Dockerfile
Normal file
9
docker/ci/Dockerfile
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
ADD $TARGETARCH/greptime /greptime/bin/
|
||||
|
||||
ENV PATH /greptime/bin/:$PATH
|
||||
|
||||
ENTRYPOINT ["greptime"]
|
||||
@@ -5,6 +5,8 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
prost = "0.11"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -6,6 +6,11 @@ fn main() {
|
||||
"greptime/v1/select.proto",
|
||||
"greptime/v1/physical_plan.proto",
|
||||
"greptime/v1/greptime.proto",
|
||||
"greptime/v1/meta/common.proto",
|
||||
"greptime/v1/meta/heartbeat.proto",
|
||||
"greptime/v1/meta/route.proto",
|
||||
"greptime/v1/meta/store.proto",
|
||||
"prometheus/remote/remote.proto",
|
||||
],
|
||||
&["."],
|
||||
)
|
||||
|
||||
@@ -49,7 +49,7 @@ message Column {
|
||||
bytes null_mask = 4;
|
||||
|
||||
// Helpful in creating vector from column.
|
||||
optional ColumnDataType datatype = 5;
|
||||
ColumnDataType datatype = 5;
|
||||
}
|
||||
|
||||
message ColumnDef {
|
||||
|
||||
@@ -2,6 +2,10 @@ syntax = "proto3";
|
||||
|
||||
package greptime.v1;
|
||||
|
||||
message RequestHeader {
|
||||
string tenant = 1;
|
||||
}
|
||||
|
||||
message ExprHeader {
|
||||
uint32 version = 1;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ message ObjectExpr {
|
||||
message SelectExpr {
|
||||
oneof expr {
|
||||
string sql = 1;
|
||||
bytes logical_plan = 2;
|
||||
PhysicalPlan physical_plan = 15;
|
||||
}
|
||||
}
|
||||
@@ -55,6 +56,8 @@ message InsertExpr {
|
||||
// The "sql" field is meant to be removed in the future.
|
||||
string sql = 3;
|
||||
}
|
||||
|
||||
map<string, bytes> options = 4;
|
||||
}
|
||||
|
||||
// TODO(jiachun)
|
||||
|
||||
@@ -3,6 +3,7 @@ syntax = "proto3";
|
||||
package greptime.v1;
|
||||
|
||||
import "greptime/v1/admin.proto";
|
||||
import "greptime/v1/common.proto";
|
||||
import "greptime/v1/database.proto";
|
||||
|
||||
service Greptime {
|
||||
@@ -10,8 +11,9 @@ service Greptime {
|
||||
}
|
||||
|
||||
message BatchRequest {
|
||||
repeated AdminRequest admins = 1;
|
||||
repeated DatabaseRequest databases = 2;
|
||||
RequestHeader header = 1;
|
||||
repeated AdminRequest admins = 2;
|
||||
repeated DatabaseRequest databases = 3;
|
||||
}
|
||||
|
||||
message BatchResponse {
|
||||
|
||||
@@ -8,3 +8,7 @@ message InsertBatch {
|
||||
repeated Column columns = 1;
|
||||
uint32 row_count = 2;
|
||||
}
|
||||
|
||||
message RegionId {
|
||||
uint64 id = 1;
|
||||
}
|
||||
|
||||
48
src/api/greptime/v1/meta/common.proto
Normal file
48
src/api/greptime/v1/meta/common.proto
Normal file
@@ -0,0 +1,48 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.meta;
|
||||
|
||||
message RequestHeader {
|
||||
uint64 protocol_version = 1;
|
||||
// cluster_id is the ID of the cluster which be sent to.
|
||||
uint64 cluster_id = 2;
|
||||
// member_id is the ID of the sender server.
|
||||
uint64 member_id = 3;
|
||||
}
|
||||
|
||||
message ResponseHeader {
|
||||
uint64 protocol_version = 1;
|
||||
// cluster_id is the ID of the cluster which sent the response.
|
||||
uint64 cluster_id = 2;
|
||||
Error error = 3;
|
||||
}
|
||||
|
||||
message Error {
|
||||
int32 code = 1;
|
||||
string err_msg = 2;
|
||||
}
|
||||
|
||||
message Peer {
|
||||
uint64 id = 1;
|
||||
string addr = 2;
|
||||
}
|
||||
|
||||
message TableName {
|
||||
string catalog_name = 1;
|
||||
string schema_name = 2;
|
||||
string table_name = 3;
|
||||
}
|
||||
|
||||
message TimeInterval {
|
||||
// The unix timestamp in millis of the start of this period.
|
||||
uint64 start_timestamp_millis = 1;
|
||||
// The unix timestamp in millis of the end of this period.
|
||||
uint64 end_timestamp_millis = 2;
|
||||
}
|
||||
|
||||
message KeyValue {
|
||||
// key is the key in bytes. An empty key is not allowed.
|
||||
bytes key = 1;
|
||||
// value is the value held by the key, in bytes.
|
||||
bytes value = 2;
|
||||
}
|
||||
92
src/api/greptime/v1/meta/heartbeat.proto
Normal file
92
src/api/greptime/v1/meta/heartbeat.proto
Normal file
@@ -0,0 +1,92 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.meta;
|
||||
|
||||
import "greptime/v1/meta/common.proto";
|
||||
|
||||
service Heartbeat {
|
||||
// Heartbeat, there may be many contents of the heartbeat, such as:
|
||||
// 1. Metadata to be registered to meta server and discoverable by other nodes.
|
||||
// 2. Some performance metrics, such as Load, CPU usage, etc.
|
||||
// 3. The number of computing tasks being executed.
|
||||
rpc Heartbeat(stream HeartbeatRequest) returns (stream HeartbeatResponse) {}
|
||||
|
||||
// Ask leader's endpoint.
|
||||
rpc AskLeader(AskLeaderRequest) returns (AskLeaderResponse) {}
|
||||
}
|
||||
|
||||
message HeartbeatRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// Self peer
|
||||
Peer peer = 2;
|
||||
// Leader node
|
||||
bool is_leader = 3;
|
||||
// Actually reported time interval
|
||||
TimeInterval report_interval = 4;
|
||||
// Node stat
|
||||
NodeStat node_stat = 5;
|
||||
// Region stats in this node
|
||||
repeated RegionStat region_stats = 6;
|
||||
// Follower nodes and stats, empty on follower nodes
|
||||
repeated ReplicaStat replica_stats = 7;
|
||||
}
|
||||
|
||||
message NodeStat {
|
||||
// The read capacity units during this period
|
||||
uint64 rcus = 1;
|
||||
// The write capacity units during this period
|
||||
uint64 wcus = 2;
|
||||
// Table number in this node
|
||||
uint64 table_num = 3;
|
||||
// Regon number in this node
|
||||
uint64 region_num = 4;
|
||||
|
||||
double cpu_usage = 5;
|
||||
double load = 6;
|
||||
// Read disk I/O in the node
|
||||
double read_io_rate = 7;
|
||||
// Write disk I/O in the node
|
||||
double write_io_rate = 8;
|
||||
|
||||
// Others
|
||||
map<string, string> attrs = 100;
|
||||
}
|
||||
|
||||
message RegionStat {
|
||||
uint64 region_id = 1;
|
||||
TableName table_name = 2;
|
||||
// The read capacity units during this period
|
||||
uint64 rcus = 3;
|
||||
// The write capacity units during this period
|
||||
uint64 wcus = 4;
|
||||
// Approximate region size
|
||||
uint64 approximate_size = 5;
|
||||
// Approximate number of rows
|
||||
uint64 approximate_rows = 6;
|
||||
|
||||
// Others
|
||||
map<string, string> attrs = 100;
|
||||
}
|
||||
|
||||
message ReplicaStat {
|
||||
Peer peer = 1;
|
||||
bool in_sync = 2;
|
||||
bool is_learner = 3;
|
||||
}
|
||||
|
||||
message HeartbeatResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
repeated bytes payload = 2;
|
||||
}
|
||||
|
||||
message AskLeaderRequest {
|
||||
RequestHeader header = 1;
|
||||
}
|
||||
|
||||
message AskLeaderResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
Peer leader = 2;
|
||||
}
|
||||
82
src/api/greptime/v1/meta/route.proto
Normal file
82
src/api/greptime/v1/meta/route.proto
Normal file
@@ -0,0 +1,82 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.meta;
|
||||
|
||||
import "greptime/v1/meta/common.proto";
|
||||
|
||||
service Router {
|
||||
// Fetch routing information for tables. The smallest unit is the complete
|
||||
// routing information(all regions) of a table.
|
||||
//
|
||||
// ```text
|
||||
// table_1
|
||||
// table_name
|
||||
// table_schema
|
||||
// regions
|
||||
// region_1
|
||||
// leader_peer
|
||||
// follower_peer_1, follower_peer_2
|
||||
// region_2
|
||||
// leader_peer
|
||||
// follower_peer_1, follower_peer_2, follower_peer_3
|
||||
// region_xxx
|
||||
// table_2
|
||||
// ...
|
||||
// ```
|
||||
//
|
||||
rpc Route(RouteRequest) returns (RouteResponse) {}
|
||||
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
}
|
||||
|
||||
message RouteRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
repeated TableName table_names = 2;
|
||||
}
|
||||
|
||||
message RouteResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
repeated Peer peers = 2;
|
||||
repeated TableRoute table_routes = 3;
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message TableRoute {
|
||||
Table table = 1;
|
||||
repeated RegionRoute region_routes = 2;
|
||||
}
|
||||
|
||||
message RegionRoute {
|
||||
Region region = 1;
|
||||
// single leader node for write task
|
||||
uint64 leader_peer_index = 2;
|
||||
// multiple follower nodes for read task
|
||||
repeated uint64 follower_peer_indexes = 3;
|
||||
}
|
||||
|
||||
message Table {
|
||||
TableName table_name = 1;
|
||||
bytes table_schema = 2;
|
||||
}
|
||||
|
||||
message Region {
|
||||
uint64 id = 1;
|
||||
string name = 2;
|
||||
Partition partition = 3;
|
||||
|
||||
map<string, string> attrs = 100;
|
||||
}
|
||||
|
||||
// PARTITION `region_name` VALUES LESS THAN (value_list)
|
||||
message Partition {
|
||||
repeated bytes column_list = 1;
|
||||
repeated bytes value_list = 2;
|
||||
}
|
||||
138
src/api/greptime/v1/meta/store.proto
Normal file
138
src/api/greptime/v1/meta/store.proto
Normal file
@@ -0,0 +1,138 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.meta;
|
||||
|
||||
import "greptime/v1/meta/common.proto";
|
||||
|
||||
service Store {
|
||||
// Range gets the keys in the range from the key-value store.
|
||||
rpc Range(RangeRequest) returns (RangeResponse);
|
||||
|
||||
// Put puts the given key into the key-value store.
|
||||
rpc Put(PutRequest) returns (PutResponse);
|
||||
|
||||
// BatchPut atomically puts the given keys into the key-value store.
|
||||
rpc BatchPut(BatchPutRequest) returns (BatchPutResponse);
|
||||
|
||||
// CompareAndPut atomically puts the value to the given updated
|
||||
// value if the current value == the expected value.
|
||||
rpc CompareAndPut(CompareAndPutRequest) returns (CompareAndPutResponse);
|
||||
|
||||
// DeleteRange deletes the given range from the key-value store.
|
||||
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
|
||||
}
|
||||
|
||||
message RangeRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// key is the first key for the range, If range_end is not given, the
|
||||
// request only looks up key.
|
||||
bytes key = 2;
|
||||
// range_end is the upper bound on the requested range [key, range_end).
|
||||
// If range_end is '\0', the range is all keys >= key.
|
||||
// If range_end is key plus one (e.g., "aa"+1 == "ab", "a\xff"+1 == "b"),
|
||||
// then the range request gets all keys prefixed with key.
|
||||
// If both key and range_end are '\0', then the range request returns all
|
||||
// keys.
|
||||
bytes range_end = 3;
|
||||
// limit is a limit on the number of keys returned for the request. When
|
||||
// limit is set to 0, it is treated as no limit.
|
||||
int64 limit = 4;
|
||||
// keys_only when set returns only the keys and not the values.
|
||||
bool keys_only = 5;
|
||||
}
|
||||
|
||||
message RangeResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// kvs is the list of key-value pairs matched by the range request.
|
||||
repeated KeyValue kvs = 2;
|
||||
// more indicates if there are more keys to return in the requested range.
|
||||
bool more = 3;
|
||||
}
|
||||
|
||||
message PutRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// key is the key, in bytes, to put into the key-value store.
|
||||
bytes key = 2;
|
||||
// value is the value, in bytes, to associate with the key in the
|
||||
// key-value store.
|
||||
bytes value = 3;
|
||||
// If prev_kv is set, gets the previous key-value pair before changing it.
|
||||
// The previous key-value pair will be returned in the put response.
|
||||
bool prev_kv = 4;
|
||||
}
|
||||
|
||||
message PutResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// If prev_kv is set in the request, the previous key-value pair will be
|
||||
// returned.
|
||||
KeyValue prev_kv = 2;
|
||||
}
|
||||
|
||||
message BatchPutRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
repeated KeyValue kvs = 2;
|
||||
// If prev_kv is set, gets the previous key-value pairs before changing it.
|
||||
// The previous key-value pairs will be returned in the batch put response.
|
||||
bool prev_kv = 3;
|
||||
}
|
||||
|
||||
message BatchPutResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// If prev_kv is set in the request, the previous key-value pairs will be
|
||||
// returned.
|
||||
repeated KeyValue prev_kvs = 2;
|
||||
}
|
||||
|
||||
message CompareAndPutRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// key is the key, in bytes, to put into the key-value store.
|
||||
bytes key = 2;
|
||||
// expect is the previous value, in bytes
|
||||
bytes expect = 3;
|
||||
// value is the value, in bytes, to associate with the key in the
|
||||
// key-value store.
|
||||
bytes value = 4;
|
||||
}
|
||||
|
||||
message CompareAndPutResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
bool success = 2;
|
||||
KeyValue prev_kv = 3;
|
||||
}
|
||||
|
||||
message DeleteRangeRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// key is the first key to delete in the range.
|
||||
bytes key = 2;
|
||||
// range_end is the key following the last key to delete for the range
|
||||
// [key, range_end).
|
||||
// If range_end is not given, the range is defined to contain only the key
|
||||
// argument.
|
||||
// If range_end is one bit larger than the given key, then the range is all
|
||||
// the keys with the prefix (the given key).
|
||||
// If range_end is '\0', the range is all keys greater than or equal to the
|
||||
// key argument.
|
||||
bytes range_end = 3;
|
||||
// If prev_kv is set, gets the previous key-value pairs before deleting it.
|
||||
// The previous key-value pairs will be returned in the delete response.
|
||||
bool prev_kv = 4;
|
||||
}
|
||||
|
||||
message DeleteRangeResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// deleted is the number of keys deleted by the delete range request.
|
||||
int64 deleted = 2;
|
||||
// If prev_kv is set in the request, the previous key-value pairs will be
|
||||
// returned.
|
||||
repeated KeyValue prev_kvs = 3;
|
||||
}
|
||||
85
src/api/prometheus/remote/remote.proto
Normal file
85
src/api/prometheus/remote/remote.proto
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright 2016 Prometheus Team
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
package prometheus;
|
||||
|
||||
option go_package = "prompb";
|
||||
|
||||
import "prometheus/remote/types.proto";
|
||||
|
||||
message WriteRequest {
|
||||
repeated prometheus.TimeSeries timeseries = 1;
|
||||
// Cortex uses this field to determine the source of the write request.
|
||||
// We reserve it to avoid any compatibility issues.
|
||||
reserved 2;
|
||||
repeated prometheus.MetricMetadata metadata = 3;
|
||||
}
|
||||
|
||||
// ReadRequest represents a remote read request.
|
||||
message ReadRequest {
|
||||
repeated Query queries = 1;
|
||||
|
||||
enum ResponseType {
|
||||
// Server will return a single ReadResponse message with matched series that includes list of raw samples.
|
||||
// It's recommended to use streamed response types instead.
|
||||
//
|
||||
// Response headers:
|
||||
// Content-Type: "application/x-protobuf"
|
||||
// Content-Encoding: "snappy"
|
||||
SAMPLES = 0;
|
||||
// Server will stream a delimited ChunkedReadResponse message that contains XOR encoded chunks for a single series.
|
||||
// Each message is following varint size and fixed size bigendian uint32 for CRC32 Castagnoli checksum.
|
||||
//
|
||||
// Response headers:
|
||||
// Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"
|
||||
// Content-Encoding: ""
|
||||
STREAMED_XOR_CHUNKS = 1;
|
||||
}
|
||||
|
||||
// accepted_response_types allows negotiating the content type of the response.
|
||||
//
|
||||
// Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is
|
||||
// implemented by server, error is returned.
|
||||
// For request that do not contain `accepted_response_types` field the SAMPLES response type will be used.
|
||||
repeated ResponseType accepted_response_types = 2;
|
||||
}
|
||||
|
||||
// ReadResponse is a response when response_type equals SAMPLES.
|
||||
message ReadResponse {
|
||||
// In same order as the request's queries.
|
||||
repeated QueryResult results = 1;
|
||||
}
|
||||
|
||||
message Query {
|
||||
int64 start_timestamp_ms = 1;
|
||||
int64 end_timestamp_ms = 2;
|
||||
repeated prometheus.LabelMatcher matchers = 3;
|
||||
prometheus.ReadHints hints = 4;
|
||||
}
|
||||
|
||||
message QueryResult {
|
||||
// Samples within a time series must be ordered by time.
|
||||
repeated prometheus.TimeSeries timeseries = 1;
|
||||
}
|
||||
|
||||
// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS.
|
||||
// We strictly stream full series after series, optionally split by time. This means that a single frame can contain
|
||||
// partition of the single series, but once a new series is started to be streamed it means that no more chunks will
|
||||
// be sent for previous one. Series are returned sorted in the same way TSDB block are internally.
|
||||
message ChunkedReadResponse {
|
||||
repeated prometheus.ChunkedSeries chunked_series = 1;
|
||||
|
||||
// query_index represents an index of the query from ReadRequest.queries these chunks relates to.
|
||||
int64 query_index = 2;
|
||||
}
|
||||
117
src/api/prometheus/remote/types.proto
Normal file
117
src/api/prometheus/remote/types.proto
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright 2017 Prometheus Team
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
package prometheus;
|
||||
|
||||
option go_package = "prompb";
|
||||
|
||||
message MetricMetadata {
|
||||
enum MetricType {
|
||||
UNKNOWN = 0;
|
||||
COUNTER = 1;
|
||||
GAUGE = 2;
|
||||
HISTOGRAM = 3;
|
||||
GAUGEHISTOGRAM = 4;
|
||||
SUMMARY = 5;
|
||||
INFO = 6;
|
||||
STATESET = 7;
|
||||
}
|
||||
|
||||
// Represents the metric type, these match the set from Prometheus.
|
||||
// Refer to model/textparse/interface.go for details.
|
||||
MetricType type = 1;
|
||||
string metric_family_name = 2;
|
||||
string help = 4;
|
||||
string unit = 5;
|
||||
}
|
||||
|
||||
message Sample {
|
||||
double value = 1;
|
||||
// timestamp is in ms format, see model/timestamp/timestamp.go for
|
||||
// conversion from time.Time to Prometheus timestamp.
|
||||
int64 timestamp = 2;
|
||||
}
|
||||
|
||||
message Exemplar {
|
||||
// Optional, can be empty.
|
||||
repeated Label labels = 1;
|
||||
double value = 2;
|
||||
// timestamp is in ms format, see model/timestamp/timestamp.go for
|
||||
// conversion from time.Time to Prometheus timestamp.
|
||||
int64 timestamp = 3;
|
||||
}
|
||||
|
||||
// TimeSeries represents samples and labels for a single time series.
|
||||
message TimeSeries {
|
||||
// For a timeseries to be valid, and for the samples and exemplars
|
||||
// to be ingested by the remote system properly, the labels field is required.
|
||||
repeated Label labels = 1;
|
||||
repeated Sample samples = 2;
|
||||
repeated Exemplar exemplars = 3;
|
||||
}
|
||||
|
||||
message Label {
|
||||
string name = 1;
|
||||
string value = 2;
|
||||
}
|
||||
|
||||
message Labels {
|
||||
repeated Label labels = 1;
|
||||
}
|
||||
|
||||
// Matcher specifies a rule, which can match or set of labels or not.
|
||||
message LabelMatcher {
|
||||
enum Type {
|
||||
EQ = 0;
|
||||
NEQ = 1;
|
||||
RE = 2;
|
||||
NRE = 3;
|
||||
}
|
||||
Type type = 1;
|
||||
string name = 2;
|
||||
string value = 3;
|
||||
}
|
||||
|
||||
message ReadHints {
|
||||
int64 step_ms = 1; // Query step size in milliseconds.
|
||||
string func = 2; // String representation of surrounding function or aggregation.
|
||||
int64 start_ms = 3; // Start time in milliseconds.
|
||||
int64 end_ms = 4; // End time in milliseconds.
|
||||
repeated string grouping = 5; // List of label names used in aggregation.
|
||||
bool by = 6; // Indicate whether it is without or by.
|
||||
int64 range_ms = 7; // Range vector selector range in milliseconds.
|
||||
}
|
||||
|
||||
// Chunk represents a TSDB chunk.
|
||||
// Time range [min, max] is inclusive.
|
||||
message Chunk {
|
||||
int64 min_time_ms = 1;
|
||||
int64 max_time_ms = 2;
|
||||
|
||||
// We require this to match chunkenc.Encoding.
|
||||
enum Encoding {
|
||||
UNKNOWN = 0;
|
||||
XOR = 1;
|
||||
}
|
||||
Encoding type = 3;
|
||||
bytes data = 4;
|
||||
}
|
||||
|
||||
// ChunkedSeries represents single, encoded time series.
|
||||
message ChunkedSeries {
|
||||
// Labels should be sorted.
|
||||
repeated Label labels = 1;
|
||||
// Chunks will be in start time order and may overlap.
|
||||
repeated Chunk chunks = 2;
|
||||
}
|
||||
@@ -1,8 +1,13 @@
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::v1::column::Values;
|
||||
use crate::v1::Column;
|
||||
use crate::v1::ColumnDataType;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -67,7 +72,6 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
|
||||
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
|
||||
}
|
||||
ConcreteDataType::Geometry(_) => todo!(),
|
||||
});
|
||||
Ok(datatype)
|
||||
}
|
||||
@@ -144,8 +148,47 @@ impl Values {
|
||||
}
|
||||
}
|
||||
|
||||
impl Column {
|
||||
// The type of vals must be same.
|
||||
pub fn push_vals(&mut self, origin_count: usize, vector: VectorRef) {
|
||||
let values = self.values.get_or_insert_with(Values::default);
|
||||
let mut null_mask = BitVec::from_slice(&self.null_mask);
|
||||
let len = vector.len();
|
||||
null_mask.reserve_exact(origin_count + len);
|
||||
null_mask.extend(BitVec::repeat(false, len));
|
||||
|
||||
(0..len).into_iter().for_each(|idx| match vector.get(idx) {
|
||||
Value::Null => null_mask.set(idx + origin_count, true),
|
||||
Value::Boolean(val) => values.bool_values.push(val),
|
||||
Value::UInt8(val) => values.u8_values.push(val.into()),
|
||||
Value::UInt16(val) => values.u16_values.push(val.into()),
|
||||
Value::UInt32(val) => values.u32_values.push(val),
|
||||
Value::UInt64(val) => values.u64_values.push(val),
|
||||
Value::Int8(val) => values.i8_values.push(val.into()),
|
||||
Value::Int16(val) => values.i16_values.push(val.into()),
|
||||
Value::Int32(val) => values.i32_values.push(val),
|
||||
Value::Int64(val) => values.i64_values.push(val),
|
||||
Value::Float32(val) => values.f32_values.push(*val),
|
||||
Value::Float64(val) => values.f64_values.push(*val),
|
||||
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => values
|
||||
.ts_millis_values
|
||||
.push(val.convert_to(TimeUnit::Millisecond)),
|
||||
Value::List(_) => unreachable!(),
|
||||
});
|
||||
self.null_mask = null_mask.into_vec();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::BooleanVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -359,4 +402,29 @@ mod tests {
|
||||
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_put_vector() {
|
||||
use crate::v1::column::SemanticType;
|
||||
// Some(false), None, Some(true), Some(true)
|
||||
let mut column = Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
values: Some(Values {
|
||||
bool_values: vec![false, true, true],
|
||||
..Default::default()
|
||||
}),
|
||||
null_mask: vec![2],
|
||||
datatype: ColumnDataType::Boolean as i32,
|
||||
};
|
||||
let row_count = 4;
|
||||
|
||||
let vector = Arc::new(BooleanVector::from(vec![Some(true), None, Some(false)]));
|
||||
column.push_vals(row_count, vector);
|
||||
// Some(false), None, Some(true), Some(true), Some(true), None, Some(false)
|
||||
let bool_values = column.values.unwrap().bool_values;
|
||||
assert_eq!(vec![false, true, true, true, false], bool_values);
|
||||
let null_mask = column.null_mask;
|
||||
assert_eq!(34, null_mask[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod error;
|
||||
pub mod helper;
|
||||
pub mod prometheus;
|
||||
pub mod serde;
|
||||
pub mod v1;
|
||||
|
||||
|
||||
5
src/api/src/prometheus.rs
Normal file
5
src/api/src/prometheus.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
#![allow(clippy::derive_partial_eq_without_eq)]
|
||||
|
||||
pub mod remote {
|
||||
tonic::include_proto!("prometheus");
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
pub use prost::DecodeError;
|
||||
use prost::Message;
|
||||
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, SelectResult};
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionId, SelectResult};
|
||||
|
||||
macro_rules! impl_convert_with_bytes {
|
||||
($data_type: ty) => {
|
||||
@@ -24,6 +24,7 @@ macro_rules! impl_convert_with_bytes {
|
||||
impl_convert_with_bytes!(InsertBatch);
|
||||
impl_convert_with_bytes!(SelectResult);
|
||||
impl_convert_with_bytes!(PhysicalPlanNode);
|
||||
impl_convert_with_bytes!(RegionId);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
@@ -127,6 +128,16 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_region_id() {
|
||||
let region_id = RegionId { id: 12 };
|
||||
|
||||
let bytes: Vec<u8> = region_id.into();
|
||||
let region_id: RegionId = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> InsertBatch {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
|
||||
@@ -4,3 +4,5 @@ tonic::include_proto!("greptime.v1");
|
||||
pub mod codec {
|
||||
tonic::include_proto!("greptime.v1.codec");
|
||||
}
|
||||
|
||||
pub mod meta;
|
||||
|
||||
54
src/api/src/v1/meta.rs
Normal file
54
src/api/src/v1/meta.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
tonic::include_proto!("greptime.v1.meta");
|
||||
|
||||
pub const PROTOCOL_VERSION: u64 = 1;
|
||||
|
||||
impl RequestHeader {
|
||||
#[inline]
|
||||
pub fn new((cluster_id, member_id): (u64, u64)) -> Self {
|
||||
Self {
|
||||
protocol_version: PROTOCOL_VERSION,
|
||||
cluster_id,
|
||||
member_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ResponseHeader {
|
||||
#[inline]
|
||||
pub fn success(cluster_id: u64) -> Self {
|
||||
Self {
|
||||
protocol_version: PROTOCOL_VERSION,
|
||||
cluster_id,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn failed(cluster_id: u64, error: Error) -> Self {
|
||||
Self {
|
||||
protocol_version: PROTOCOL_VERSION,
|
||||
cluster_id,
|
||||
error: Some(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! gen_set_header {
|
||||
($req: ty) => {
|
||||
impl $req {
|
||||
#[inline]
|
||||
pub fn set_header(&mut self, (cluster_id, member_id): (u64, u64)) {
|
||||
self.header = Some(RequestHeader::new((cluster_id, member_id)));
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
gen_set_header!(HeartbeatRequest);
|
||||
gen_set_header!(RouteRequest);
|
||||
gen_set_header!(CreateRequest);
|
||||
gen_set_header!(RangeRequest);
|
||||
gen_set_header!(PutRequest);
|
||||
gen_set_header!(BatchPutRequest);
|
||||
gen_set_header!(CompareAndPutRequest);
|
||||
gen_set_header!(DeleteRangeRequest);
|
||||
@@ -5,21 +5,42 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
arc-swap = "1.0"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
backoff = { version = "0.4", features = ["tokio"] }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
opendal = "0.17"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
storage = { path = "../storage" }
|
||||
table = { path = "../table" }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
log-store = { path = "../log-store" }
|
||||
object-store = { path = "../object-store" }
|
||||
opendal = "0.17"
|
||||
storage = { path = "../storage" }
|
||||
table-engine = { path = "../table-engine" }
|
||||
tempdir = "0.3"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -4,6 +4,7 @@ use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::schema::RawSchema;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
@@ -103,6 +104,57 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Illegal catalog manager state: {}", msg))]
|
||||
IllegalManagerState { backtrace: Backtrace, msg: String },
|
||||
|
||||
#[snafu(display("Failed to scan system catalog table, source: {}", source))]
|
||||
SystemCatalogTableScan {
|
||||
#[snafu(backtrace)]
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid table schema in catalog entry, table:{}, schema: {:?}, source: {}",
|
||||
table_info,
|
||||
schema,
|
||||
source
|
||||
))]
|
||||
InvalidTableSchema {
|
||||
table_info: String,
|
||||
schema: RawSchema,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
|
||||
SystemCatalogTableScanExec {
|
||||
#[snafu(backtrace)]
|
||||
source: common_query::error::Error,
|
||||
},
|
||||
#[snafu(display("Cannot parse catalog value, source: {}", source))]
|
||||
InvalidCatalogValue {
|
||||
#[snafu(backtrace)]
|
||||
source: common_catalog::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("IO error occurred while fetching catalog info, source: {}", source))]
|
||||
Io {
|
||||
backtrace: Backtrace,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Local and remote catalog data are inconsistent, msg: {}", msg))]
|
||||
CatalogStateInconsistent { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to perform metasrv operation, source: {}", source))]
|
||||
MetaSrv {
|
||||
#[snafu(backtrace)]
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to bump table id"))]
|
||||
BumpTableId { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to parse table id from metasrv, data: {:?}", data))]
|
||||
ParseTableId { data: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -115,14 +167,17 @@ impl ErrorExt for Error {
|
||||
| Error::TableNotFound { .. }
|
||||
| Error::IllegalManagerState { .. }
|
||||
| Error::CatalogNotFound { .. }
|
||||
| Error::InvalidEntryType { .. } => StatusCode::Unexpected,
|
||||
| Error::InvalidEntryType { .. }
|
||||
| Error::CatalogStateInconsistent { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::SystemCatalog { .. } | Error::EmptyValue | Error::ValueDeserialize { .. } => {
|
||||
StatusCode::StorageUnavailable
|
||||
}
|
||||
Error::SystemCatalog { .. }
|
||||
| Error::EmptyValue
|
||||
| Error::ValueDeserialize { .. }
|
||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
@@ -132,6 +187,13 @@ impl ErrorExt for Error {
|
||||
| Error::InsertTableRecord { source, .. }
|
||||
| Error::OpenTable { source, .. }
|
||||
| Error::CreateTable { source, .. } => source.status_code(),
|
||||
Error::MetaSrv { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScan { source } => source.status_code(),
|
||||
Error::SystemCatalogTableScanExec { source } => source.status_code(),
|
||||
Error::InvalidTableSchema { source, .. } => source.status_code(),
|
||||
Error::BumpTableId { .. } | Error::ParseTableId { .. } => {
|
||||
StatusCode::StorageUnavailable
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,20 +3,21 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
use snafu::ResultExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::CreateTableRequest;
|
||||
use table::TableRef;
|
||||
|
||||
pub use crate::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
pub use crate::manager::LocalCatalogManager;
|
||||
use crate::error::{CreateTableSnafu, Result};
|
||||
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
|
||||
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
mod manager;
|
||||
pub mod memory;
|
||||
pub mod local;
|
||||
pub mod remote;
|
||||
pub mod schema;
|
||||
mod system;
|
||||
pub mod system;
|
||||
pub mod tables;
|
||||
|
||||
/// Represent a list of named catalogs
|
||||
@@ -31,13 +32,13 @@ pub trait CatalogList: Sync + Send {
|
||||
&self,
|
||||
name: String,
|
||||
catalog: CatalogProviderRef,
|
||||
) -> Option<CatalogProviderRef>;
|
||||
) -> Result<Option<CatalogProviderRef>>;
|
||||
|
||||
/// Retrieves the list of available catalog names
|
||||
fn catalog_names(&self) -> Vec<String>;
|
||||
fn catalog_names(&self) -> Result<Vec<String>>;
|
||||
|
||||
/// Retrieves a specific catalog by name, provided it exists.
|
||||
fn catalog(&self, name: &str) -> Option<CatalogProviderRef>;
|
||||
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>>;
|
||||
}
|
||||
|
||||
/// Represents a catalog, comprising a number of named schemas.
|
||||
@@ -47,14 +48,17 @@ pub trait CatalogProvider: Sync + Send {
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Retrieves the list of available schema names in this catalog.
|
||||
fn schema_names(&self) -> Vec<String>;
|
||||
fn schema_names(&self) -> Result<Vec<String>>;
|
||||
|
||||
/// Registers schema to this catalog.
|
||||
fn register_schema(&self, name: String, schema: SchemaProviderRef)
|
||||
-> Option<SchemaProviderRef>;
|
||||
fn register_schema(
|
||||
&self,
|
||||
name: String,
|
||||
schema: SchemaProviderRef,
|
||||
) -> Result<Option<SchemaProviderRef>>;
|
||||
|
||||
/// Retrieves a specific schema from the catalog by name, provided it exists.
|
||||
fn schema(&self, name: &str) -> Option<SchemaProviderRef>;
|
||||
fn schema(&self, name: &str) -> Result<Option<SchemaProviderRef>>;
|
||||
}
|
||||
|
||||
pub type CatalogListRef = Arc<dyn CatalogList>;
|
||||
@@ -63,32 +67,27 @@ pub type CatalogProviderRef = Arc<dyn CatalogProvider>;
|
||||
#[async_trait::async_trait]
|
||||
pub trait CatalogManager: CatalogList {
|
||||
/// Starts a catalog manager.
|
||||
async fn start(&self) -> error::Result<()>;
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Returns next available table id.
|
||||
fn next_table_id(&self) -> TableId;
|
||||
async fn next_table_id(&self) -> Result<TableId>;
|
||||
|
||||
/// Registers a table given given catalog/schema to catalog manager,
|
||||
/// returns table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> error::Result<usize>;
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
|
||||
|
||||
/// Register a system table, should be called before starting the manager.
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest)
|
||||
-> error::Result<()>;
|
||||
|
||||
/// Returns the table by catalog, schema and table name.
|
||||
fn table(
|
||||
&self,
|
||||
catalog: Option<&str>,
|
||||
schema: Option<&str>,
|
||||
table_name: &str,
|
||||
) -> error::Result<Option<TableRef>>;
|
||||
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
|
||||
}
|
||||
|
||||
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
|
||||
|
||||
/// Hook called after system table opening.
|
||||
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> error::Result<()> + Send + Sync>;
|
||||
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> Result<()> + Send + Sync>;
|
||||
|
||||
/// Register system table request:
|
||||
/// - When system table is already created and registered, the hook will be called
|
||||
@@ -99,9 +98,10 @@ pub struct RegisterSystemTableRequest {
|
||||
pub open_hook: Option<OpenSystemTableHook>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RegisterTableRequest {
|
||||
pub catalog: Option<String>,
|
||||
pub schema: Option<String>,
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table_name: String,
|
||||
pub table_id: TableId,
|
||||
pub table: TableRef,
|
||||
@@ -111,3 +111,53 @@ pub struct RegisterTableRequest {
|
||||
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
|
||||
format!("{}.{}.{}", catalog, schema, table)
|
||||
}
|
||||
|
||||
pub trait CatalogProviderFactory {
|
||||
fn create(&self, catalog_name: String) -> CatalogProviderRef;
|
||||
}
|
||||
|
||||
pub trait SchemaProviderFactory {
|
||||
fn create(&self, catalog_name: String, schema_name: String) -> SchemaProviderRef;
|
||||
}
|
||||
|
||||
pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
|
||||
manager: &'a M,
|
||||
engine: TableEngineRef,
|
||||
sys_table_requests: &'a mut Vec<RegisterSystemTableRequest>,
|
||||
) -> Result<()> {
|
||||
for req in sys_table_requests.drain(..) {
|
||||
let catalog_name = &req.create_table_request.catalog_name;
|
||||
let schema_name = &req.create_table_request.schema_name;
|
||||
let table_name = &req.create_table_request.table_name;
|
||||
let table_id = req.create_table_request.id;
|
||||
|
||||
let table = if let Some(table) = manager.table(catalog_name, schema_name, table_name)? {
|
||||
table
|
||||
} else {
|
||||
let table = engine
|
||||
.create_table(&EngineContext::default(), req.create_table_request.clone())
|
||||
.await
|
||||
.with_context(|_| CreateTableSnafu {
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id: {}",
|
||||
catalog_name, schema_name, table_name, table_id,
|
||||
),
|
||||
})?;
|
||||
manager
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: catalog_name.clone(),
|
||||
schema: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
table_id,
|
||||
table: table.clone(),
|
||||
})
|
||||
.await?;
|
||||
info!("Created and registered system table: {}", table_name);
|
||||
table
|
||||
};
|
||||
if let Some(hook) = req.open_hook {
|
||||
(hook)(table)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
7
src/catalog/src/local.rs
Normal file
7
src/catalog/src/local.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub mod manager;
|
||||
pub mod memory;
|
||||
|
||||
pub use manager::LocalCatalogManager;
|
||||
pub use memory::{
|
||||
new_memory_catalog_list, MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider,
|
||||
};
|
||||
@@ -2,8 +2,12 @@ use std::any::Any;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::info;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
@@ -15,25 +19,22 @@ use table::requests::OpenTableRequest;
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
|
||||
use super::error::Result;
|
||||
use crate::consts::{
|
||||
INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use crate::error::Result;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, IllegalManagerStateSnafu, OpenTableSnafu,
|
||||
ReadSystemCatalogSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
|
||||
SystemCatalogTypeMismatchSnafu, TableExistsSnafu, TableNotFoundSnafu,
|
||||
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
|
||||
SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu, TableExistsSnafu,
|
||||
TableNotFoundSnafu,
|
||||
};
|
||||
use crate::memory::{MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::local::memory::{MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::system::{
|
||||
decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX,
|
||||
VALUE_INDEX,
|
||||
};
|
||||
use crate::tables::SystemCatalog;
|
||||
use crate::{
|
||||
format_full_table_name, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, DEFAULT_CATALOG_NAME,
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
|
||||
CatalogProvider, CatalogProviderRef, RegisterSystemTableRequest, RegisterTableRequest,
|
||||
SchemaProvider,
|
||||
};
|
||||
|
||||
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
|
||||
@@ -50,7 +51,7 @@ impl LocalCatalogManager {
|
||||
/// Create a new [CatalogManager] with given user catalogs and table engine
|
||||
pub async fn try_new(engine: TableEngineRef) -> Result<Self> {
|
||||
let table = SystemCatalogTable::new(engine.clone()).await?;
|
||||
let memory_catalog_list = crate::memory::new_memory_catalog_list()?;
|
||||
let memory_catalog_list = crate::local::memory::new_memory_catalog_list()?;
|
||||
let system_catalog = Arc::new(SystemCatalog::new(
|
||||
table,
|
||||
memory_catalog_list.clone(),
|
||||
@@ -90,49 +91,7 @@ impl LocalCatalogManager {
|
||||
|
||||
// Processing system table hooks
|
||||
let mut sys_table_requests = self.system_table_requests.lock().await;
|
||||
for req in sys_table_requests.drain(..) {
|
||||
let catalog_name = &req.create_table_request.catalog_name;
|
||||
let schema_name = &req.create_table_request.schema_name;
|
||||
let table_name = &req.create_table_request.table_name;
|
||||
let table_id = req.create_table_request.id;
|
||||
|
||||
let table = if let Some(table) =
|
||||
self.table(catalog_name.as_deref(), schema_name.as_deref(), table_name)?
|
||||
{
|
||||
table
|
||||
} else {
|
||||
let table = self
|
||||
.engine
|
||||
.create_table(&EngineContext::default(), req.create_table_request.clone())
|
||||
.await
|
||||
.with_context(|_| CreateTableSnafu {
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id: {}",
|
||||
catalog_name.as_deref().unwrap_or(DEFAULT_CATALOG_NAME),
|
||||
schema_name.as_deref().unwrap_or(DEFAULT_SCHEMA_NAME),
|
||||
table_name,
|
||||
table_id,
|
||||
),
|
||||
})?;
|
||||
self.register_table(RegisterTableRequest {
|
||||
catalog: catalog_name.clone(),
|
||||
schema: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
table_id,
|
||||
table: table.clone(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
info!("Created and registered system table: {}", table_name);
|
||||
|
||||
table
|
||||
};
|
||||
|
||||
if let Some(hook) = req.open_hook {
|
||||
(hook)(table)?;
|
||||
}
|
||||
}
|
||||
|
||||
handle_system_table_request(self, self.engine.clone(), &mut sys_table_requests).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -143,9 +102,9 @@ impl LocalCatalogManager {
|
||||
self.system.information_schema.system.clone(),
|
||||
)?;
|
||||
let system_catalog = Arc::new(MemoryCatalogProvider::new());
|
||||
system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema);
|
||||
system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema)?;
|
||||
self.catalogs
|
||||
.register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog);
|
||||
.register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog)?;
|
||||
|
||||
let default_catalog = Arc::new(MemoryCatalogProvider::new());
|
||||
let default_schema = Arc::new(MemorySchemaProvider::new());
|
||||
@@ -155,9 +114,9 @@ impl LocalCatalogManager {
|
||||
let table = Arc::new(NumbersTable::default());
|
||||
default_schema.register_table("numbers".to_string(), table)?;
|
||||
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema);
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
|
||||
self.catalogs
|
||||
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
|
||||
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -213,18 +172,17 @@ impl LocalCatalogManager {
|
||||
Entry::Schema(s) => {
|
||||
let catalog =
|
||||
self.catalogs
|
||||
.catalog(&s.catalog_name)
|
||||
.catalog(&s.catalog_name)?
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &s.catalog_name,
|
||||
})?;
|
||||
catalog.register_schema(
|
||||
s.schema_name.clone(),
|
||||
Arc::new(MemorySchemaProvider::new()),
|
||||
);
|
||||
)?;
|
||||
info!("Registered schema: {:?}", s);
|
||||
}
|
||||
Entry::Table(t) => {
|
||||
debug!("t: {:?}", t);
|
||||
self.open_and_register_table(&t).await?;
|
||||
info!("Registered table: {:?}", t);
|
||||
max_table_id = max_table_id.max(t.table_id);
|
||||
@@ -237,12 +195,12 @@ impl LocalCatalogManager {
|
||||
async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> {
|
||||
let catalog = self
|
||||
.catalogs
|
||||
.catalog(&t.catalog_name)
|
||||
.catalog(&t.catalog_name)?
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &t.catalog_name,
|
||||
})?;
|
||||
let schema = catalog
|
||||
.schema(&t.schema_name)
|
||||
.schema(&t.schema_name)?
|
||||
.context(SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", &t.catalog_name, &t.schema_name),
|
||||
})?;
|
||||
@@ -286,19 +244,19 @@ impl CatalogList for LocalCatalogManager {
|
||||
&self,
|
||||
name: String,
|
||||
catalog: CatalogProviderRef,
|
||||
) -> Option<Arc<dyn CatalogProvider>> {
|
||||
) -> Result<Option<CatalogProviderRef>> {
|
||||
self.catalogs.register_catalog(name, catalog)
|
||||
}
|
||||
|
||||
fn catalog_names(&self) -> Vec<String> {
|
||||
let mut res = self.catalogs.catalog_names();
|
||||
fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
let mut res = self.catalogs.catalog_names()?;
|
||||
res.push(SYSTEM_CATALOG_NAME.to_string());
|
||||
res
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
|
||||
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
|
||||
if name.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
|
||||
Some(self.system.clone())
|
||||
Ok(Some(self.system.clone()))
|
||||
} else {
|
||||
self.catalogs.catalog(name)
|
||||
}
|
||||
@@ -307,15 +265,15 @@ impl CatalogList for LocalCatalogManager {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl CatalogManager for LocalCatalogManager {
|
||||
/// Start [MemoryCatalogManager] to load all information from system catalog table.
|
||||
/// Start [LocalCatalogManager] to load all information from system catalog table.
|
||||
/// Make sure table engine is initialized before starting [MemoryCatalogManager].
|
||||
async fn start(&self) -> Result<()> {
|
||||
self.init().await
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next_table_id(&self) -> TableId {
|
||||
self.next_table_id.fetch_add(1, Ordering::Relaxed)
|
||||
async fn next_table_id(&self) -> Result<TableId> {
|
||||
Ok(self.next_table_id.fetch_add(1, Ordering::Relaxed))
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
@@ -328,36 +286,30 @@ impl CatalogManager for LocalCatalogManager {
|
||||
}
|
||||
);
|
||||
|
||||
let catalog_name = request
|
||||
.catalog
|
||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
||||
let schema_name = request
|
||||
.schema
|
||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
||||
let catalog_name = &request.catalog;
|
||||
let schema_name = &request.schema;
|
||||
|
||||
let catalog = self
|
||||
.catalogs
|
||||
.catalog(&catalog_name)
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &catalog_name,
|
||||
})?;
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
let schema = catalog
|
||||
.schema(&schema_name)
|
||||
.schema(schema_name)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
|
||||
if schema.table_exist(&request.table_name) {
|
||||
if schema.table_exist(&request.table_name)? {
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(&catalog_name, &schema_name, &request.table_name),
|
||||
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
@@ -383,22 +335,19 @@ impl CatalogManager for LocalCatalogManager {
|
||||
|
||||
fn table(
|
||||
&self,
|
||||
catalog: Option<&str>,
|
||||
schema: Option<&str>,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
) -> Result<Option<TableRef>> {
|
||||
let catalog_name = catalog.unwrap_or(DEFAULT_CATALOG_NAME);
|
||||
let schema_name = schema.unwrap_or(DEFAULT_SCHEMA_NAME);
|
||||
|
||||
let catalog = self
|
||||
.catalogs
|
||||
.catalog(catalog_name)
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
let schema = catalog
|
||||
.schema(schema_name)
|
||||
.schema(schema_name)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
Ok(schema.table(table_name))
|
||||
schema.table(table_name)
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ impl MemoryCatalogList {
|
||||
pub fn register_catalog_if_absent(
|
||||
&self,
|
||||
name: String,
|
||||
catalog: Arc<dyn CatalogProvider>,
|
||||
catalog: CatalogProviderRef,
|
||||
) -> Option<CatalogProviderRef> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
let entry = catalogs.entry(name);
|
||||
@@ -46,19 +46,19 @@ impl CatalogList for MemoryCatalogList {
|
||||
&self,
|
||||
name: String,
|
||||
catalog: CatalogProviderRef,
|
||||
) -> Option<CatalogProviderRef> {
|
||||
) -> Result<Option<CatalogProviderRef>> {
|
||||
let mut catalogs = self.catalogs.write().unwrap();
|
||||
catalogs.insert(name, catalog)
|
||||
Ok(catalogs.insert(name, catalog))
|
||||
}
|
||||
|
||||
fn catalog_names(&self) -> Vec<String> {
|
||||
fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
let catalogs = self.catalogs.read().unwrap();
|
||||
catalogs.keys().map(|s| s.to_string()).collect()
|
||||
Ok(catalogs.keys().map(|s| s.to_string()).collect())
|
||||
}
|
||||
|
||||
fn catalog(&self, name: &str) -> Option<CatalogProviderRef> {
|
||||
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
|
||||
let catalogs = self.catalogs.read().unwrap();
|
||||
catalogs.get(name).cloned()
|
||||
Ok(catalogs.get(name).cloned())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,23 +87,23 @@ impl CatalogProvider for MemoryCatalogProvider {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema_names(&self) -> Vec<String> {
|
||||
fn schema_names(&self) -> Result<Vec<String>> {
|
||||
let schemas = self.schemas.read().unwrap();
|
||||
schemas.keys().cloned().collect()
|
||||
Ok(schemas.keys().cloned().collect())
|
||||
}
|
||||
|
||||
fn register_schema(
|
||||
&self,
|
||||
name: String,
|
||||
schema: SchemaProviderRef,
|
||||
) -> Option<SchemaProviderRef> {
|
||||
) -> Result<Option<SchemaProviderRef>> {
|
||||
let mut schemas = self.schemas.write().unwrap();
|
||||
schemas.insert(name, schema)
|
||||
Ok(schemas.insert(name, schema))
|
||||
}
|
||||
|
||||
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
|
||||
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
|
||||
let schemas = self.schemas.read().unwrap();
|
||||
schemas.get(name).cloned()
|
||||
Ok(schemas.get(name).cloned())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -132,18 +132,18 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Vec<String> {
|
||||
fn table_names(&self) -> Result<Vec<String>> {
|
||||
let tables = self.tables.read().unwrap();
|
||||
tables.keys().cloned().collect()
|
||||
Ok(tables.keys().cloned().collect())
|
||||
}
|
||||
|
||||
fn table(&self, name: &str) -> Option<TableRef> {
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let tables = self.tables.read().unwrap();
|
||||
tables.get(name).cloned()
|
||||
Ok(tables.get(name).cloned())
|
||||
}
|
||||
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
if self.table_exist(name.as_str()) {
|
||||
if self.table_exist(name.as_str())? {
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
let mut tables = self.tables.write().unwrap();
|
||||
@@ -155,9 +155,9 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
Ok(tables.remove(name))
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> bool {
|
||||
fn table_exist(&self, name: &str) -> Result<bool> {
|
||||
let tables = self.tables.read().unwrap();
|
||||
tables.contains_key(name)
|
||||
Ok(tables.contains_key(name))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,40 +168,50 @@ pub fn new_memory_catalog_list() -> Result<Arc<MemoryCatalogList>> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_catalog::consts::*;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::prelude::StatusCode;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
use super::*;
|
||||
use crate::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
|
||||
#[test]
|
||||
fn test_new_memory_catalog_list() {
|
||||
let catalog_list = new_memory_catalog_list().unwrap();
|
||||
|
||||
assert!(catalog_list.catalog(DEFAULT_CATALOG_NAME).is_none());
|
||||
assert!(catalog_list
|
||||
.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.is_none());
|
||||
let default_catalog = Arc::new(MemoryCatalogProvider::default());
|
||||
catalog_list.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog.clone());
|
||||
catalog_list
|
||||
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog.clone())
|
||||
.unwrap();
|
||||
|
||||
assert!(default_catalog.schema(DEFAULT_SCHEMA_NAME).is_none());
|
||||
assert!(default_catalog
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.is_none());
|
||||
let default_schema = Arc::new(MemorySchemaProvider::default());
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone());
|
||||
default_catalog
|
||||
.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())
|
||||
.unwrap();
|
||||
|
||||
default_schema
|
||||
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
|
||||
let table = default_schema.table("numbers");
|
||||
let table = default_schema.table("numbers").unwrap();
|
||||
assert!(table.is_some());
|
||||
|
||||
assert!(default_schema.table("not_exists").is_none());
|
||||
assert!(default_schema.table("not_exists").unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mem_provider() {
|
||||
let provider = MemorySchemaProvider::new();
|
||||
let table_name = "numbers";
|
||||
assert!(!provider.table_exist(table_name));
|
||||
assert!(!provider.table_exist(table_name).unwrap());
|
||||
assert!(provider.deregister_table(table_name).unwrap().is_none());
|
||||
let test_table = NumbersTable::default();
|
||||
// register table successfully
|
||||
@@ -209,7 +219,7 @@ mod tests {
|
||||
.register_table(table_name.to_string(), Arc::new(test_table))
|
||||
.unwrap()
|
||||
.is_none());
|
||||
assert!(provider.table_exist(table_name));
|
||||
assert!(provider.table_exist(table_name).unwrap());
|
||||
let other_table = NumbersTable::default();
|
||||
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
|
||||
let err = result.err().unwrap();
|
||||
117
src/catalog/src/remote.rs
Normal file
117
src/catalog/src/remote.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use std::fmt::Debug;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use client::MetaKvBackend;
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
pub use manager::{RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider};
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
mod client;
|
||||
mod manager;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
|
||||
|
||||
pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait KvBackend: Send + Sync {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b;
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
|
||||
|
||||
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
|
||||
/// with key is the same as `expect`, the value will be updated to `val`.
|
||||
///
|
||||
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
|
||||
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
|
||||
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
|
||||
/// - If any error happens during operation, an `Err(Error)` will be returned.
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error>;
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
|
||||
self.delete_range(key, &[]).await
|
||||
}
|
||||
|
||||
/// Default get is implemented based on `range` method.
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
|
||||
let mut iter = self.range(key);
|
||||
while let Some(r) = iter.next().await {
|
||||
let kv = r?;
|
||||
if kv.0 == key {
|
||||
return Ok(Some(kv));
|
||||
}
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
|
||||
pub type KvBackendRef = Arc<dyn KvBackend>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use async_stream::stream;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct MockKvBackend {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MockKvBackend {
|
||||
fn range<'a, 'b>(&'a self, _key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
Box::pin(stream!({
|
||||
for i in 0..3 {
|
||||
yield Ok(Kv(
|
||||
i.to_string().as_bytes().to_vec(),
|
||||
i.to_string().as_bytes().to_vec(),
|
||||
))
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
async fn set(&self, _key: &[u8], _val: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
_key: &[u8],
|
||||
_expect: &[u8],
|
||||
_val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get() {
|
||||
let backend = MockKvBackend {};
|
||||
let result = backend.get(0.to_string().as_bytes()).await;
|
||||
assert_eq!(0.to_string().as_bytes(), result.unwrap().unwrap().0);
|
||||
let result = backend.get(1.to_string().as_bytes()).await;
|
||||
assert_eq!(1.to_string().as_bytes(), result.unwrap().unwrap().0);
|
||||
let result = backend.get(2.to_string().as_bytes()).await;
|
||||
assert_eq!(2.to_string().as_bytes(), result.unwrap().unwrap().0);
|
||||
let result = backend.get(3.to_string().as_bytes()).await;
|
||||
assert!(result.unwrap().is_none());
|
||||
}
|
||||
}
|
||||
93
src/catalog/src/remote/client.rs
Normal file
93
src/catalog/src/remote/client.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use std::fmt::Debug;
|
||||
|
||||
use async_stream::stream;
|
||||
use common_telemetry::info;
|
||||
use meta_client::client::MetaClient;
|
||||
use meta_client::rpc::{CompareAndPutRequest, DeleteRangeRequest, PutRequest, RangeRequest};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, MetaSrvSnafu};
|
||||
use crate::remote::{Kv, KvBackend, ValueIter};
|
||||
#[derive(Debug)]
|
||||
pub struct MetaKvBackend {
|
||||
pub client: MetaClient,
|
||||
}
|
||||
|
||||
/// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since
|
||||
/// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead
|
||||
/// comparing to `Accessor`'s list and get method.
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MetaKvBackend {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
let key = key.to_vec();
|
||||
Box::pin(stream!({
|
||||
let mut resp = self
|
||||
.client
|
||||
.range(RangeRequest::new().with_prefix(key))
|
||||
.await
|
||||
.context(MetaSrvSnafu)?;
|
||||
let kvs = resp.take_kvs();
|
||||
for mut kv in kvs.into_iter() {
|
||||
yield Ok(Kv(kv.take_key(), kv.take_value()))
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
|
||||
let mut response = self
|
||||
.client
|
||||
.range(RangeRequest::new().with_key(key))
|
||||
.await
|
||||
.context(MetaSrvSnafu)?;
|
||||
Ok(response
|
||||
.take_kvs()
|
||||
.get_mut(0)
|
||||
.map(|kv| Kv(kv.take_key(), kv.take_value())))
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
|
||||
let req = PutRequest::new()
|
||||
.with_key(key.to_vec())
|
||||
.with_value(val.to_vec());
|
||||
let _ = self.client.put(req).await.context(MetaSrvSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
|
||||
let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
|
||||
info!(
|
||||
"Delete range, key: {}, end: {}, deleted: {}",
|
||||
String::from_utf8_lossy(key),
|
||||
String::from_utf8_lossy(end),
|
||||
resp.deleted()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
let request = CompareAndPutRequest::new()
|
||||
.with_key(key.to_vec())
|
||||
.with_expect(expect.to_vec())
|
||||
.with_value(val.to_vec());
|
||||
let mut response = self
|
||||
.client
|
||||
.compare_and_put(request)
|
||||
.await
|
||||
.context(MetaSrvSnafu)?;
|
||||
if response.is_success() {
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
|
||||
}
|
||||
}
|
||||
}
|
||||
749
src/catalog/src/remote/manager.rs
Normal file
749
src/catalog/src/remote/manager.rs
Normal file
@@ -0,0 +1,749 @@
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use backoff::exponential::ExponentialBackoffBuilder;
|
||||
use backoff::ExponentialBackoff;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use common_telemetry::{debug, error, info};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{CreateTableRequest, OpenTableRequest};
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::{
|
||||
BumpTableIdSnafu, CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu,
|
||||
OpenTableSnafu, ParseTableIdSnafu, SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
};
|
||||
use crate::error::{InvalidTableSchemaSnafu, Result};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Catalog manager based on metasrv.
|
||||
pub struct RemoteCatalogManager {
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
|
||||
engine: TableEngineRef,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
mutex: Arc<Mutex<()>>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogManager {
|
||||
pub fn new(engine: TableEngineRef, node_id: u64, backend: KvBackendRef) -> Self {
|
||||
Self {
|
||||
engine,
|
||||
node_id,
|
||||
backend,
|
||||
catalogs: Default::default(),
|
||||
system_table_requests: Default::default(),
|
||||
mutex: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_catalog_key(&self, catalog_name: impl AsRef<str>) -> CatalogKey {
|
||||
CatalogKey {
|
||||
catalog_name: catalog_name.as_ref().to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_catalog_provider(&self, catalog_name: &str) -> CatalogProviderRef {
|
||||
Arc::new(RemoteCatalogProvider {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
backend: self.backend.clone(),
|
||||
schemas: Default::default(),
|
||||
mutex: Default::default(),
|
||||
}) as _
|
||||
}
|
||||
|
||||
fn new_schema_provider(&self, catalog_name: &str, schema_name: &str) -> SchemaProviderRef {
|
||||
Arc::new(RemoteSchemaProvider {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
schema_name: schema_name.to_string(),
|
||||
tables: Default::default(),
|
||||
node_id: self.node_id,
|
||||
backend: self.backend.clone(),
|
||||
mutex: Default::default(),
|
||||
}) as _
|
||||
}
|
||||
|
||||
async fn iter_remote_catalogs(
|
||||
&self,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<CatalogKey>> + Send + '_>> {
|
||||
let catalog_range_prefix = build_catalog_prefix();
|
||||
info!("catalog_range_prefix: {}", catalog_range_prefix);
|
||||
let mut catalogs = self.backend.range(catalog_range_prefix.as_bytes());
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = catalogs.next().await {
|
||||
let Kv(k, _) = r?;
|
||||
if !k.starts_with(catalog_range_prefix.as_bytes()) {
|
||||
debug!("Ignoring non-catalog key: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
}
|
||||
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
yield Ok(key)
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
async fn iter_remote_schemas(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<SchemaKey>> + Send + '_>> {
|
||||
let schema_prefix = build_schema_prefix(catalog_name);
|
||||
let mut schemas = self.backend.range(schema_prefix.as_bytes());
|
||||
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = schemas.next().await {
|
||||
let Kv(k, _) = r?;
|
||||
if !k.starts_with(schema_prefix.as_bytes()) {
|
||||
debug!("Ignoring non-schema key: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
}
|
||||
|
||||
let schema_key = SchemaKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
yield Ok(schema_key)
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
/// Iterate over all table entries on metasrv
|
||||
/// TODO(hl): table entries with different version is not currently considered.
|
||||
/// Ideally deprecated table entry must be deleted when deregistering from catalog.
|
||||
async fn iter_remote_tables(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<(TableGlobalKey, TableGlobalValue)>> + Send + '_>> {
|
||||
let table_prefix = build_table_global_prefix(catalog_name, schema_name);
|
||||
let mut tables = self.backend.range(table_prefix.as_bytes());
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = tables.next().await {
|
||||
let Kv(k, v) = r?;
|
||||
if !k.starts_with(table_prefix.as_bytes()) {
|
||||
debug!("Ignoring non-table prefix: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
}
|
||||
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
debug!(
|
||||
"Found catalog table entry, key: {}, value: {:?}",
|
||||
table_key, table_value
|
||||
);
|
||||
// metasrv has allocated region ids to current datanode
|
||||
if table_value
|
||||
.regions_id_map
|
||||
.get(&self.node_id)
|
||||
.map(|v| !v.is_empty())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
yield Ok((table_key, table_value))
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
/// Fetch catalogs/schemas/tables from remote catalog manager along with max table id allocated.
|
||||
async fn initiate_catalogs(&self) -> Result<(HashMap<String, CatalogProviderRef>, TableId)> {
|
||||
let mut res = HashMap::new();
|
||||
let max_table_id = MIN_USER_TABLE_ID - 1;
|
||||
|
||||
// initiate default catalog and schema
|
||||
let default_catalog = self.initiate_default_catalog().await?;
|
||||
res.insert(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
|
||||
info!("Default catalog and schema registered");
|
||||
|
||||
let mut catalogs = self.iter_remote_catalogs().await;
|
||||
while let Some(r) = catalogs.next().await {
|
||||
let CatalogKey { catalog_name, .. } = r?;
|
||||
info!("Fetch catalog from metasrv: {}", catalog_name);
|
||||
let catalog = res
|
||||
.entry(catalog_name.clone())
|
||||
.or_insert_with(|| self.new_catalog_provider(&catalog_name))
|
||||
.clone();
|
||||
|
||||
self.initiate_schemas(catalog_name, catalog, max_table_id)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok((res, max_table_id))
|
||||
}
|
||||
|
||||
async fn initiate_schemas(
|
||||
&self,
|
||||
catalog_name: String,
|
||||
catalog: CatalogProviderRef,
|
||||
max_table_id: TableId,
|
||||
) -> Result<()> {
|
||||
let mut schemas = self.iter_remote_schemas(&catalog_name).await;
|
||||
while let Some(r) = schemas.next().await {
|
||||
let SchemaKey {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
..
|
||||
} = r?;
|
||||
info!("Found schema: {}.{}", catalog_name, schema_name);
|
||||
let schema = match catalog.schema(&schema_name)? {
|
||||
None => {
|
||||
let schema = self.new_schema_provider(&catalog_name, &schema_name);
|
||||
catalog.register_schema(schema_name.clone(), schema.clone())?;
|
||||
info!("Registered schema: {}", &schema_name);
|
||||
schema
|
||||
}
|
||||
Some(schema) => schema,
|
||||
};
|
||||
|
||||
info!(
|
||||
"Fetch schema from metasrv: {}.{}",
|
||||
&catalog_name, &schema_name
|
||||
);
|
||||
self.initiate_tables(&catalog_name, &schema_name, schema, max_table_id)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Initiates all tables inside a catalog by fetching data from metasrv.
|
||||
async fn initiate_tables<'a>(
|
||||
&'a self,
|
||||
catalog_name: &'a str,
|
||||
schema_name: &'a str,
|
||||
schema: SchemaProviderRef,
|
||||
mut max_table_id: TableId,
|
||||
) -> Result<()> {
|
||||
let mut tables = self.iter_remote_tables(catalog_name, schema_name).await;
|
||||
while let Some(r) = tables.next().await {
|
||||
let (table_key, table_value) = r?;
|
||||
let table_ref = self.open_or_create_table(&table_key, &table_value).await?;
|
||||
schema.register_table(table_key.table_name.to_string(), table_ref)?;
|
||||
info!("Registered table {}", &table_key.table_name);
|
||||
if table_value.id > max_table_id {
|
||||
info!("Max table id: {} -> {}", max_table_id, table_value.id);
|
||||
max_table_id = table_value.id;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn initiate_default_catalog(&self) -> Result<CatalogProviderRef> {
|
||||
let default_catalog = self.new_catalog_provider(DEFAULT_CATALOG_NAME);
|
||||
let default_schema = self.new_schema_provider(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())?;
|
||||
let schema_key = SchemaKey {
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
self.backend
|
||||
.set(
|
||||
schema_key.as_bytes(),
|
||||
&SchemaValue {}
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
info!("Registered default schema");
|
||||
|
||||
let catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
self.backend
|
||||
.set(
|
||||
catalog_key.as_bytes(),
|
||||
&CatalogValue {}
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
info!("Registered default catalog");
|
||||
Ok(default_catalog)
|
||||
}
|
||||
|
||||
async fn open_or_create_table(
|
||||
&self,
|
||||
table_key: &TableGlobalKey,
|
||||
table_value: &TableGlobalValue,
|
||||
) -> Result<TableRef> {
|
||||
let context = EngineContext {};
|
||||
let TableGlobalKey {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
..
|
||||
} = table_key;
|
||||
|
||||
let TableGlobalValue {
|
||||
id,
|
||||
meta,
|
||||
regions_id_map,
|
||||
..
|
||||
} = table_value;
|
||||
|
||||
let request = OpenTableRequest {
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
table_id: *id,
|
||||
};
|
||||
match self
|
||||
.engine
|
||||
.open_table(&context, request)
|
||||
.await
|
||||
.with_context(|_| OpenTableSnafu {
|
||||
table_info: format!("{}.{}.{}, id:{}", catalog_name, schema_name, table_name, id,),
|
||||
})? {
|
||||
Some(table) => Ok(table),
|
||||
None => {
|
||||
let schema = meta
|
||||
.schema
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(InvalidTableSchemaSnafu {
|
||||
table_info: format!("{}.{}.{}", catalog_name, schema_name, table_name,),
|
||||
schema: meta.schema.clone(),
|
||||
})?;
|
||||
let req = CreateTableRequest {
|
||||
id: *id,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: Arc::new(schema),
|
||||
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
|
||||
primary_key_indices: meta.primary_key_indices.clone(),
|
||||
create_if_not_exists: true,
|
||||
table_options: meta.options.clone(),
|
||||
};
|
||||
|
||||
self.engine
|
||||
.create_table(&context, req)
|
||||
.await
|
||||
.context(CreateTableSnafu {
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id:{}",
|
||||
&catalog_name, &schema_name, &table_name, id
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl CatalogManager for RemoteCatalogManager {
|
||||
async fn start(&self) -> Result<()> {
|
||||
let (catalogs, max_table_id) = self.initiate_catalogs().await?;
|
||||
info!(
|
||||
"Initialized catalogs: {:?}",
|
||||
catalogs.keys().cloned().collect::<Vec<_>>()
|
||||
);
|
||||
self.catalogs.store(Arc::new(catalogs));
|
||||
info!("Max table id allocated: {}", max_table_id);
|
||||
|
||||
let mut system_table_requests = self.system_table_requests.lock().await;
|
||||
handle_system_table_request(self, self.engine.clone(), &mut system_table_requests).await?;
|
||||
info!("All system table opened");
|
||||
|
||||
self.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Bump table id in a CAS manner with backoff.
|
||||
async fn next_table_id(&self) -> Result<TableId> {
|
||||
let key = common_catalog::consts::TABLE_ID_KEY_PREFIX.as_bytes();
|
||||
let op = || async {
|
||||
// TODO(hl): optimize this get
|
||||
let (prev, prev_bytes) = match self.backend.get(key).await? {
|
||||
None => (MIN_USER_TABLE_ID, vec![]),
|
||||
Some(kv) => (parse_table_id(&kv.1)?, kv.1),
|
||||
};
|
||||
|
||||
match self
|
||||
.backend
|
||||
.compare_and_set(key, &prev_bytes, &(prev + 1).to_le_bytes())
|
||||
.await
|
||||
{
|
||||
Ok(cas_res) => match cas_res {
|
||||
Ok(_) => Ok(prev),
|
||||
Err(e) => {
|
||||
info!("Table id {:?} already occupied", e);
|
||||
Err(backoff::Error::transient(
|
||||
BumpTableIdSnafu {
|
||||
msg: "Table id occupied",
|
||||
}
|
||||
.build(),
|
||||
))
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!(e;"Failed to CAS table id");
|
||||
Err(backoff::Error::permanent(
|
||||
BumpTableIdSnafu {
|
||||
msg: format!("Failed to perform CAS operation: {:?}", e),
|
||||
}
|
||||
.build(),
|
||||
))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let retry_policy: ExponentialBackoff = ExponentialBackoffBuilder::new()
|
||||
.with_initial_interval(Duration::from_millis(4))
|
||||
.with_multiplier(2.0)
|
||||
.with_max_interval(Duration::from_millis(1000))
|
||||
.with_max_elapsed_time(Some(Duration::from_millis(3000)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(retry_policy, op).await.map_err(|e| {
|
||||
BumpTableIdSnafu {
|
||||
msg: format!(
|
||||
"Bump table id exceeds max fail times, last error msg: {:?}",
|
||||
e
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &catalog_name,
|
||||
})?;
|
||||
let schema_provider =
|
||||
catalog_provider
|
||||
.schema(&schema_name)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", &catalog_name, &schema_name),
|
||||
})?;
|
||||
if schema_provider.table_exist(&request.table_name)? {
|
||||
return TableExistsSnafu {
|
||||
table: format!("{}.{}.{}", &catalog_name, &schema_name, &request.table_name),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
schema_provider.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
let mut requests = self.system_table_requests.lock().await;
|
||||
requests.push(request);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn table(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
) -> Result<Option<TableRef>> {
|
||||
let catalog = self
|
||||
.catalog(catalog_name)?
|
||||
.with_context(|| CatalogNotFoundSnafu { catalog_name })?;
|
||||
let schema = catalog
|
||||
.schema(schema_name)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
schema.table(table_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl CatalogList for RemoteCatalogManager {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn register_catalog(
|
||||
&self,
|
||||
name: String,
|
||||
catalog: CatalogProviderRef,
|
||||
) -> Result<Option<CatalogProviderRef>> {
|
||||
let key = self.build_catalog_key(&name).to_string();
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let catalogs = self.catalogs.clone();
|
||||
|
||||
std::thread::spawn(|| {
|
||||
common_runtime::block_on_write(async move {
|
||||
let _guard = mutex.lock().await;
|
||||
backend
|
||||
.set(
|
||||
key.as_bytes(),
|
||||
&CatalogValue {}
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
let prev_catalogs = catalogs.load();
|
||||
let mut new_catalogs = HashMap::with_capacity(prev_catalogs.len() + 1);
|
||||
new_catalogs.clone_from(&prev_catalogs);
|
||||
let prev = new_catalogs.insert(name, catalog);
|
||||
catalogs.store(Arc::new(new_catalogs));
|
||||
Ok(prev)
|
||||
})
|
||||
})
|
||||
.join()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// List all catalogs from metasrv
|
||||
fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
Ok(self.catalogs.load().keys().cloned().collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
/// Read catalog info of given name from metasrv.
|
||||
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
|
||||
Ok(self.catalogs.load().get(name).cloned())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RemoteCatalogProvider {
|
||||
catalog_name: String,
|
||||
backend: KvBackendRef,
|
||||
schemas: Arc<ArcSwap<HashMap<String, SchemaProviderRef>>>,
|
||||
mutex: Arc<Mutex<()>>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogProvider {
|
||||
pub fn new(catalog_name: String, backend: KvBackendRef) -> Self {
|
||||
Self {
|
||||
catalog_name,
|
||||
backend,
|
||||
schemas: Default::default(),
|
||||
mutex: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_schema_key(&self, schema_name: impl AsRef<str>) -> SchemaKey {
|
||||
SchemaKey {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
schema_name: schema_name.as_ref().to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CatalogProvider for RemoteCatalogProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema_names(&self) -> Result<Vec<String>> {
|
||||
Ok(self.schemas.load().keys().cloned().collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
fn register_schema(
|
||||
&self,
|
||||
name: String,
|
||||
schema: SchemaProviderRef,
|
||||
) -> Result<Option<SchemaProviderRef>> {
|
||||
let key = self.build_schema_key(&name).to_string();
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let schemas = self.schemas.clone();
|
||||
|
||||
std::thread::spawn(|| {
|
||||
common_runtime::block_on_write(async move {
|
||||
let _guard = mutex.lock().await;
|
||||
backend
|
||||
.set(
|
||||
key.as_bytes(),
|
||||
&SchemaValue {}
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let prev_schemas = schemas.load();
|
||||
let mut new_schemas = HashMap::with_capacity(prev_schemas.len() + 1);
|
||||
new_schemas.clone_from(&prev_schemas);
|
||||
let prev_schema = new_schemas.insert(name, schema);
|
||||
schemas.store(Arc::new(new_schemas));
|
||||
Ok(prev_schema)
|
||||
})
|
||||
})
|
||||
.join()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
|
||||
Ok(self.schemas.load().get(name).cloned())
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse u8 slice to `TableId`
|
||||
fn parse_table_id(val: &[u8]) -> Result<TableId> {
|
||||
Ok(TableId::from_le_bytes(val.try_into().map_err(|_| {
|
||||
ParseTableIdSnafu {
|
||||
data: format!("{:?}", val),
|
||||
}
|
||||
.build()
|
||||
})?))
|
||||
}
|
||||
|
||||
pub struct RemoteSchemaProvider {
|
||||
catalog_name: String,
|
||||
schema_name: String,
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
tables: Arc<ArcSwap<HashMap<String, TableRef>>>,
|
||||
mutex: Arc<Mutex<()>>,
|
||||
}
|
||||
|
||||
impl RemoteSchemaProvider {
|
||||
pub fn new(
|
||||
catalog_name: String,
|
||||
schema_name: String,
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
node_id,
|
||||
backend,
|
||||
tables: Default::default(),
|
||||
mutex: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_regional_table_key(&self, table_name: impl AsRef<str>) -> TableRegionalKey {
|
||||
TableRegionalKey {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
schema_name: self.schema_name.clone(),
|
||||
table_name: table_name.as_ref().to_string(),
|
||||
node_id: self.node_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaProvider for RemoteSchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Result<Vec<String>> {
|
||||
Ok(self.tables.load().keys().cloned().collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
Ok(self.tables.load().get(name).cloned())
|
||||
}
|
||||
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
let table_info = table.table_info();
|
||||
let table_version = table_info.ident.version;
|
||||
let table_value = TableRegionalValue {
|
||||
version: table_version,
|
||||
regions_ids: table.table_info().meta.region_numbers.clone(),
|
||||
};
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let tables = self.tables.clone();
|
||||
let table_key = self.build_regional_table_key(&name).to_string();
|
||||
|
||||
let prev = std::thread::spawn(move || {
|
||||
common_runtime::block_on_read(async move {
|
||||
let _guard = mutex.lock().await;
|
||||
backend
|
||||
.set(
|
||||
table_key.as_bytes(),
|
||||
&table_value.as_bytes().context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
debug!(
|
||||
"Successfully set catalog table entry, key: {}, table value: {:?}",
|
||||
table_key, table_value
|
||||
);
|
||||
|
||||
let prev_tables = tables.load();
|
||||
let mut new_tables = HashMap::with_capacity(prev_tables.len() + 1);
|
||||
new_tables.clone_from(&prev_tables);
|
||||
let prev = new_tables.insert(name, table);
|
||||
tables.store(Arc::new(new_tables));
|
||||
Ok(prev)
|
||||
})
|
||||
})
|
||||
.join()
|
||||
.unwrap();
|
||||
prev
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let table_name = name.to_string();
|
||||
let table_key = self.build_regional_table_key(&table_name).to_string();
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let tables = self.tables.clone();
|
||||
let prev = std::thread::spawn(move || {
|
||||
common_runtime::block_on_read(async move {
|
||||
let _guard = mutex.lock().await;
|
||||
backend.delete(table_key.as_bytes()).await?;
|
||||
debug!(
|
||||
"Successfully deleted catalog table entry, key: {}",
|
||||
table_key
|
||||
);
|
||||
|
||||
let prev_tables = tables.load();
|
||||
let mut new_tables = HashMap::with_capacity(prev_tables.len() + 1);
|
||||
new_tables.clone_from(&prev_tables);
|
||||
let prev = new_tables.remove(&table_name);
|
||||
tables.store(Arc::new(new_tables));
|
||||
Ok(prev)
|
||||
})
|
||||
})
|
||||
.join()
|
||||
.unwrap();
|
||||
prev
|
||||
}
|
||||
|
||||
/// Checks if table exists in schema provider based on locally opened table map.
|
||||
fn table_exist(&self, name: &str) -> Result<bool> {
|
||||
Ok(self.tables.load().contains_key(name))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_table_id() {
|
||||
assert_eq!(12, parse_table_id(&12_i32.to_le_bytes()).unwrap());
|
||||
let mut data = vec![];
|
||||
data.extend_from_slice(&12_i32.to_le_bytes());
|
||||
data.push(0);
|
||||
assert!(parse_table_id(&data).is_err());
|
||||
}
|
||||
}
|
||||
@@ -12,10 +12,10 @@ pub trait SchemaProvider: Sync + Send {
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Retrieves the list of available table names in this schema.
|
||||
fn table_names(&self) -> Vec<String>;
|
||||
fn table_names(&self) -> Result<Vec<String>>;
|
||||
|
||||
/// Retrieves a specific table from the schema by name, provided it exists.
|
||||
fn table(&self, name: &str) -> Option<TableRef>;
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>>;
|
||||
|
||||
/// If supported by the implementation, adds a new table to this schema.
|
||||
/// If a table of the same name existed before, it returns "Table already exists" error.
|
||||
@@ -28,7 +28,7 @@ pub trait SchemaProvider: Sync + Send {
|
||||
/// If supported by the implementation, checks the table exist in the schema provider or not.
|
||||
/// If no matched table in the schema provider, return false.
|
||||
/// Otherwise, return true.
|
||||
fn table_exist(&self, name: &str) -> bool;
|
||||
fn table_exist(&self, name: &str) -> Result<bool>;
|
||||
}
|
||||
|
||||
pub type SchemaProviderRef = Arc<dyn SchemaProvider>;
|
||||
|
||||
@@ -2,7 +2,13 @@ use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{
|
||||
INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
|
||||
SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::physical_plan::RuntimeEnv;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use common_time::timestamp::Timestamp;
|
||||
@@ -13,16 +19,12 @@ use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::TableId;
|
||||
use table::metadata::{TableId, TableInfoRef};
|
||||
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest};
|
||||
use table::{Table, TableRef};
|
||||
|
||||
use crate::consts::{
|
||||
INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
|
||||
SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use crate::error::{
|
||||
CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
|
||||
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
|
||||
OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
|
||||
};
|
||||
|
||||
@@ -32,7 +34,7 @@ pub const TIMESTAMP_INDEX: usize = 2;
|
||||
pub const VALUE_INDEX: usize = 3;
|
||||
|
||||
pub struct SystemCatalogTable {
|
||||
schema: SchemaRef,
|
||||
table_info: TableInfoRef,
|
||||
pub table: TableRef,
|
||||
}
|
||||
|
||||
@@ -43,7 +45,7 @@ impl Table for SystemCatalogTable {
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
self.table_info.meta.schema.clone()
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
@@ -51,7 +53,7 @@ impl Table for SystemCatalogTable {
|
||||
_projection: &Option<Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> table::Result<SendableRecordBatchStream> {
|
||||
) -> table::Result<PhysicalPlanRef> {
|
||||
panic!("System catalog table does not support scan!")
|
||||
}
|
||||
|
||||
@@ -59,6 +61,10 @@ impl Table for SystemCatalogTable {
|
||||
async fn insert(&self, request: InsertRequest) -> table::error::Result<usize> {
|
||||
self.table.insert(request).await
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
self.table_info.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl SystemCatalogTable {
|
||||
@@ -77,16 +83,20 @@ impl SystemCatalogTable {
|
||||
.await
|
||||
.context(OpenSystemCatalogSnafu)?
|
||||
{
|
||||
Ok(Self { table, schema })
|
||||
Ok(Self {
|
||||
table_info: table.table_info(),
|
||||
table,
|
||||
})
|
||||
} else {
|
||||
// system catalog table is not yet created, try to create
|
||||
let request = CreateTableRequest {
|
||||
id: SYSTEM_CATALOG_TABLE_ID,
|
||||
catalog_name: Some(SYSTEM_CATALOG_NAME.to_string()),
|
||||
schema_name: Some(INFORMATION_SCHEMA_NAME.to_string()),
|
||||
catalog_name: SYSTEM_CATALOG_NAME.to_string(),
|
||||
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
|
||||
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
desc: Some("System catalog table".to_string()),
|
||||
schema: schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
@@ -96,14 +106,23 @@ impl SystemCatalogTable {
|
||||
.create_table(&ctx, request)
|
||||
.await
|
||||
.context(CreateSystemCatalogSnafu)?;
|
||||
Ok(Self { table, schema })
|
||||
let table_info = table.table_info();
|
||||
Ok(Self { table, table_info })
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a stream of all entries inside system catalog table
|
||||
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
||||
let full_projection = None;
|
||||
let stream = self.table.scan(&full_projection, &[], None).await.unwrap();
|
||||
let scan = self
|
||||
.table
|
||||
.scan(&full_projection, &[], None)
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanSnafu)?;
|
||||
let stream = scan
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanExecSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
}
|
||||
@@ -153,7 +172,7 @@ fn build_system_catalog_schema() -> Schema {
|
||||
// The schema of this table must be valid.
|
||||
SchemaBuilder::try_from(cols)
|
||||
.unwrap()
|
||||
.timestamp_index(2)
|
||||
.timestamp_index(Some(2))
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
@@ -320,6 +339,16 @@ pub struct TableEntryValue {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use log_store::fs::noop::NoopLogStore;
|
||||
use object_store::ObjectStore;
|
||||
use storage::config::EngineConfig as StorageEngineConfig;
|
||||
use storage::EngineImpl;
|
||||
use table::metadata::TableType;
|
||||
use table::metadata::TableType::Base;
|
||||
use table_engine::config::EngineConfig;
|
||||
use table_engine::engine::MitoEngine;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -391,4 +420,43 @@ mod tests {
|
||||
assert_eq!(EntryType::Table, EntryType::try_from(3).unwrap());
|
||||
assert!(EntryType::try_from(4).is_err());
|
||||
}
|
||||
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = TempDir::new("system-table-test").unwrap();
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = opendal::services::fs::Builder::default()
|
||||
.root(&store_dir)
|
||||
.build()
|
||||
.unwrap();
|
||||
let object_store = ObjectStore::new(accessor);
|
||||
let table_engine = Arc::new(MitoEngine::new(
|
||||
EngineConfig::default(),
|
||||
EngineImpl::new(
|
||||
StorageEngineConfig::default(),
|
||||
Arc::new(NoopLogStore::default()),
|
||||
object_store.clone(),
|
||||
),
|
||||
object_store,
|
||||
));
|
||||
(dir, table_engine)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_system_table_type() {
|
||||
let (_dir, table_engine) = prepare_table_engine().await;
|
||||
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
|
||||
assert_eq!(Base, system_table.table_type());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_system_table_info() {
|
||||
let (_dir, table_engine) = prepare_table_engine().await;
|
||||
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
|
||||
let info = system_table.table_info();
|
||||
assert_eq!(TableType::Base, info.table_type);
|
||||
assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name);
|
||||
assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id);
|
||||
assert_eq!(SYSTEM_CATALOG_NAME, info.catalog_name);
|
||||
assert_eq!(INFORMATION_SCHEMA_NAME, info.schema_name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,9 +6,12 @@ use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use async_stream::stream;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
@@ -16,11 +19,12 @@ use datatypes::vectors::VectorRef;
|
||||
use futures::Stream;
|
||||
use snafu::ResultExt;
|
||||
use table::engine::TableEngineRef;
|
||||
use table::metadata::TableId;
|
||||
use table::error::TablesRecordBatchSnafu;
|
||||
use table::metadata::{TableId, TableInfoRef};
|
||||
use table::table::scan::SimpleTableScan;
|
||||
use table::{Table, TableRef};
|
||||
|
||||
use crate::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
|
||||
use crate::error::InsertTableRecordSnafu;
|
||||
use crate::error::{Error, InsertTableRecordSnafu};
|
||||
use crate::system::{build_table_insert_request, SystemCatalogTable};
|
||||
use crate::{
|
||||
format_full_table_name, CatalogListRef, CatalogProvider, SchemaProvider, SchemaProviderRef,
|
||||
@@ -53,23 +57,53 @@ impl Table for Tables {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
unreachable!("Tables does not support table_info method")
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_projection: &Option<Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> table::error::Result<SendableRecordBatchStream> {
|
||||
) -> table::error::Result<PhysicalPlanRef> {
|
||||
let catalogs = self.catalogs.clone();
|
||||
let schema_ref = self.schema.clone();
|
||||
let engine_name = self.engine_name.clone();
|
||||
|
||||
let stream = stream!({
|
||||
for catalog_name in catalogs.catalog_names() {
|
||||
let catalog = catalogs.catalog(&catalog_name).unwrap();
|
||||
for schema_name in catalog.schema_names() {
|
||||
let mut tables_in_schema = Vec::with_capacity(catalog.schema_names().len());
|
||||
let schema = catalog.schema(&schema_name).unwrap();
|
||||
for table_name in schema.table_names() {
|
||||
for catalog_name in catalogs
|
||||
.catalog_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
{
|
||||
let catalog = catalogs
|
||||
.catalog(&catalog_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.unwrap();
|
||||
for schema_name in catalog
|
||||
.schema_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
{
|
||||
let mut tables_in_schema = Vec::with_capacity(
|
||||
catalog
|
||||
.schema_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.len(),
|
||||
);
|
||||
let schema = catalog
|
||||
.schema(&schema_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.unwrap();
|
||||
for table_name in schema
|
||||
.table_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
{
|
||||
tables_in_schema.push(table_name);
|
||||
}
|
||||
|
||||
@@ -85,10 +119,11 @@ impl Table for Tables {
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::pin(TablesRecordBatchStream {
|
||||
let stream = Box::pin(TablesRecordBatchStream {
|
||||
schema: self.schema.clone(),
|
||||
stream: Box::pin(stream),
|
||||
}))
|
||||
});
|
||||
Ok(Arc::new(SimpleTableScan::new(stream)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,17 +187,20 @@ impl SchemaProvider for InformationSchema {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Vec<String> {
|
||||
vec!["tables".to_string(), SYSTEM_CATALOG_TABLE_NAME.to_string()]
|
||||
fn table_names(&self) -> Result<Vec<String>, Error> {
|
||||
Ok(vec![
|
||||
"tables".to_string(),
|
||||
SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
])
|
||||
}
|
||||
|
||||
fn table(&self, name: &str) -> Option<TableRef> {
|
||||
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
|
||||
if name.eq_ignore_ascii_case("tables") {
|
||||
Some(self.tables.clone())
|
||||
Ok(Some(self.tables.clone()))
|
||||
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
|
||||
Some(self.system.clone())
|
||||
Ok(Some(self.system.clone()))
|
||||
} else {
|
||||
None
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,8 +216,9 @@ impl SchemaProvider for InformationSchema {
|
||||
panic!("System catalog & schema does not support deregister table")
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> bool {
|
||||
name.eq_ignore_ascii_case("tables") || name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME)
|
||||
fn table_exist(&self, name: &str) -> Result<bool, Error> {
|
||||
Ok(name.eq_ignore_ascii_case("tables")
|
||||
|| name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,23 +263,23 @@ impl CatalogProvider for SystemCatalog {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema_names(&self) -> Vec<String> {
|
||||
vec![INFORMATION_SCHEMA_NAME.to_string()]
|
||||
fn schema_names(&self) -> Result<Vec<String>, Error> {
|
||||
Ok(vec![INFORMATION_SCHEMA_NAME.to_string()])
|
||||
}
|
||||
|
||||
fn register_schema(
|
||||
&self,
|
||||
_name: String,
|
||||
_schema: SchemaProviderRef,
|
||||
) -> Option<SchemaProviderRef> {
|
||||
) -> Result<Option<SchemaProviderRef>, Error> {
|
||||
panic!("System catalog does not support registering schema!")
|
||||
}
|
||||
|
||||
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
|
||||
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>, Error> {
|
||||
if name.eq_ignore_ascii_case(INFORMATION_SCHEMA_NAME) {
|
||||
Some(self.information_schema.clone())
|
||||
Ok(Some(self.information_schema.clone()))
|
||||
} else {
|
||||
None
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -273,13 +312,16 @@ fn build_schema_for_tables() -> Schema {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_query::physical_plan::RuntimeEnv;
|
||||
use datatypes::arrow::array::Utf8Array;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use futures_util::StreamExt;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
use super::*;
|
||||
use crate::memory::{new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::local::memory::{
|
||||
new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider,
|
||||
};
|
||||
use crate::CatalogList;
|
||||
|
||||
#[tokio::test]
|
||||
@@ -290,11 +332,19 @@ mod tests {
|
||||
schema
|
||||
.register_table("test_table".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
catalog_provider.register_schema("test_schema".to_string(), schema);
|
||||
catalog_list.register_catalog("test_catalog".to_string(), catalog_provider);
|
||||
catalog_provider
|
||||
.register_schema("test_schema".to_string(), schema)
|
||||
.unwrap();
|
||||
catalog_list
|
||||
.register_catalog("test_catalog".to_string(), catalog_provider)
|
||||
.unwrap();
|
||||
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
||||
|
||||
let mut tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
||||
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
||||
let mut tables_stream = tables_stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
if let Some(t) = tables_stream.next().await {
|
||||
let batch = t.unwrap().df_recordbatch;
|
||||
assert_eq!(1, batch.num_rows());
|
||||
|
||||
192
src/catalog/tests/mock.rs
Normal file
192
src/catalog/tests/mock.rs
Normal file
@@ -0,0 +1,192 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use catalog::error::Error;
|
||||
use catalog::remote::{Kv, KvBackend, ValueIter};
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_telemetry::logging::info;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::StringVector;
|
||||
use serde::Serializer;
|
||||
use table::engine::{EngineContext, TableEngine};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AlterTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest};
|
||||
use table::test_util::MemTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MockKvBackend {
|
||||
map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
|
||||
}
|
||||
|
||||
impl Display for MockKvBackend {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
futures::executor::block_on(async {
|
||||
let map = self.map.read().await;
|
||||
for (k, v) in map.iter() {
|
||||
f.serialize_str(&String::from_utf8_lossy(k))?;
|
||||
f.serialize_str(" -> ")?;
|
||||
f.serialize_str(&String::from_utf8_lossy(v))?;
|
||||
f.serialize_str("\n")?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for MockKvBackend {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
let prefix = key.to_vec();
|
||||
let prefix_string = String::from_utf8_lossy(&prefix).to_string();
|
||||
Box::pin(stream!({
|
||||
let maps = self.map.read().await.clone();
|
||||
for (k, v) in maps.range(prefix.clone()..) {
|
||||
let key_string = String::from_utf8_lossy(k).to_string();
|
||||
let matches = key_string.starts_with(&prefix_string);
|
||||
if matches {
|
||||
yield Ok(Kv(k.clone(), v.clone()))
|
||||
} else {
|
||||
info!("Stream finished");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
|
||||
let mut map = self.map.write().await;
|
||||
map.insert(key.to_vec(), val.to_vec());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
let mut map = self.map.write().await;
|
||||
let existing = map.entry(key.to_vec());
|
||||
match existing {
|
||||
Entry::Vacant(e) => {
|
||||
if expect.is_empty() {
|
||||
e.insert(val.to_vec());
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(None))
|
||||
}
|
||||
}
|
||||
Entry::Occupied(mut existing) => {
|
||||
if existing.get() == expect {
|
||||
existing.insert(val.to_vec());
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(Some(existing.get().clone())))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
let start = key.to_vec();
|
||||
let end = end.to_vec();
|
||||
let range = start..end;
|
||||
|
||||
let mut map = self.map.write().await;
|
||||
map.retain(|k, _| !range.contains(k));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MockTableEngine {
|
||||
tables: RwLock<HashMap<String, TableRef>>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TableEngine for MockTableEngine {
|
||||
fn name(&self) -> &str {
|
||||
"MockTableEngine"
|
||||
}
|
||||
|
||||
/// Create a table with only one column
|
||||
async fn create_table(
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
request: CreateTableRequest,
|
||||
) -> table::Result<TableRef> {
|
||||
let table_name = request.table_name.clone();
|
||||
let catalog_name = request.catalog_name.clone();
|
||||
let schema_name = request.schema_name.clone();
|
||||
|
||||
let default_table_id = "0".to_owned();
|
||||
let table_id = TableId::from_str(
|
||||
request
|
||||
.table_options
|
||||
.get("table_id")
|
||||
.unwrap_or(&default_table_id),
|
||||
)
|
||||
.unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
|
||||
"name",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)]));
|
||||
|
||||
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
|
||||
let record_batch = RecordBatch::new(schema, data).unwrap();
|
||||
let table: TableRef = Arc::new(MemTable::new_with_catalog(
|
||||
&table_name,
|
||||
record_batch,
|
||||
table_id,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
)) as Arc<_>;
|
||||
|
||||
let mut tables = self.tables.write().await;
|
||||
tables.insert(table_name, table.clone() as TableRef);
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
async fn open_table(
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
request: OpenTableRequest,
|
||||
) -> table::Result<Option<TableRef>> {
|
||||
Ok(self.tables.read().await.get(&request.table_name).cloned())
|
||||
}
|
||||
|
||||
async fn alter_table(
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
_request: AlterTableRequest,
|
||||
) -> table::Result<TableRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn get_table(&self, _ctx: &EngineContext, name: &str) -> table::Result<Option<TableRef>> {
|
||||
futures::executor::block_on(async { Ok(self.tables.read().await.get(name).cloned()) })
|
||||
}
|
||||
|
||||
fn table_exists(&self, _ctx: &EngineContext, name: &str) -> bool {
|
||||
futures::executor::block_on(async { self.tables.read().await.contains_key(name) })
|
||||
}
|
||||
|
||||
async fn drop_table(
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
_request: DropTableRequest,
|
||||
) -> table::Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
295
src/catalog/tests/remote_catalog_tests.rs
Normal file
295
src/catalog/tests/remote_catalog_tests.rs
Normal file
@@ -0,0 +1,295 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
mod mock;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::remote::{
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogManager, CatalogManagerRef, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::requests::CreateTableRequest;
|
||||
|
||||
use crate::mock::{MockKvBackend, MockTableEngine};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_backend() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let backend = MockKvBackend::default();
|
||||
|
||||
let default_catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
|
||||
backend
|
||||
.set(
|
||||
default_catalog_key.as_bytes(),
|
||||
&CatalogValue {}.as_bytes().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let schema_key = SchemaKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
backend
|
||||
.set(schema_key.as_bytes(), &SchemaValue {}.as_bytes().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut iter = backend.range("__c-".as_bytes());
|
||||
let mut res = HashSet::new();
|
||||
while let Some(r) = iter.next().await {
|
||||
let kv = r.unwrap();
|
||||
res.insert(String::from_utf8_lossy(&kv.0).to_string());
|
||||
}
|
||||
assert_eq!(
|
||||
vec!["__c-greptime".to_string()],
|
||||
res.into_iter().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
async fn prepare_components(node_id: u64) -> (KvBackendRef, TableEngineRef, CatalogManagerRef) {
|
||||
let backend = Arc::new(MockKvBackend::default()) as KvBackendRef;
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let catalog_manager =
|
||||
RemoteCatalogManager::new(table_engine.clone(), node_id, backend.clone());
|
||||
catalog_manager.start().await.unwrap();
|
||||
(backend, table_engine, Arc::new(catalog_manager))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_remote_catalog_default() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let node_id = 42;
|
||||
let (_, _, catalog_manager) = prepare_components(node_id).await;
|
||||
assert_eq!(
|
||||
vec![DEFAULT_CATALOG_NAME.to_string()],
|
||||
catalog_manager.catalog_names().unwrap()
|
||||
);
|
||||
|
||||
let default_catalog = catalog_manager
|
||||
.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
vec![DEFAULT_SCHEMA_NAME.to_string()],
|
||||
default_catalog.schema_names().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_remote_catalog_register_nonexistent() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let node_id = 42;
|
||||
let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
|
||||
// register a new table with an nonexistent catalog
|
||||
let catalog_name = "nonexistent_catalog".to_string();
|
||||
let schema_name = "nonexistent_schema".to_string();
|
||||
let table_name = "fail_table".to_string();
|
||||
// this schema has no effect
|
||||
let table_schema = Arc::new(Schema::new(vec![]));
|
||||
let table = table_engine
|
||||
.create_table(
|
||||
&EngineContext {},
|
||||
CreateTableRequest {
|
||||
id: 1,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: table_schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let reg_req = RegisterTableRequest {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
table_name,
|
||||
table_id: 1,
|
||||
table,
|
||||
};
|
||||
let res = catalog_manager.register_table(reg_req).await;
|
||||
|
||||
// because nonexistent_catalog does not exist yet.
|
||||
assert_matches!(
|
||||
res.err().unwrap(),
|
||||
catalog::error::Error::CatalogNotFound { .. }
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_register_table() {
|
||||
let node_id = 42;
|
||||
let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
|
||||
let default_catalog = catalog_manager
|
||||
.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
vec![DEFAULT_SCHEMA_NAME.to_string()],
|
||||
default_catalog.schema_names().unwrap()
|
||||
);
|
||||
|
||||
let default_schema = default_catalog
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(vec!["numbers"], default_schema.table_names().unwrap());
|
||||
|
||||
// register a new table with an nonexistent catalog
|
||||
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
|
||||
let schema_name = DEFAULT_SCHEMA_NAME.to_string();
|
||||
let table_name = "test_table".to_string();
|
||||
let table_id = 1;
|
||||
// this schema has no effect
|
||||
let table_schema = Arc::new(Schema::new(vec![]));
|
||||
let table = table_engine
|
||||
.create_table(
|
||||
&EngineContext {},
|
||||
CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: table_schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let reg_req = RegisterTableRequest {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
table_name: table_name.clone(),
|
||||
table_id,
|
||||
table,
|
||||
};
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
.table_names()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_register_catalog_schema_table() {
|
||||
let node_id = 42;
|
||||
let (backend, table_engine, catalog_manager) = prepare_components(node_id).await;
|
||||
|
||||
let catalog_name = "test_catalog".to_string();
|
||||
let schema_name = "nonexistent_schema".to_string();
|
||||
let catalog = Arc::new(RemoteCatalogProvider::new(
|
||||
catalog_name.clone(),
|
||||
backend.clone(),
|
||||
));
|
||||
|
||||
// register catalog to catalog manager
|
||||
catalog_manager
|
||||
.register_catalog(catalog_name.clone(), catalog)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
HashSet::<String>::from_iter(
|
||||
vec![DEFAULT_CATALOG_NAME.to_string(), catalog_name.clone()].into_iter()
|
||||
),
|
||||
HashSet::from_iter(catalog_manager.catalog_names().unwrap().into_iter())
|
||||
);
|
||||
|
||||
let table_to_register = table_engine
|
||||
.create_table(
|
||||
&EngineContext {},
|
||||
CreateTableRequest {
|
||||
id: 2,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: "".to_string(),
|
||||
desc: None,
|
||||
schema: Arc::new(Schema::new(vec![])),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let reg_req = RegisterTableRequest {
|
||||
catalog: catalog_name.clone(),
|
||||
schema: schema_name.clone(),
|
||||
table_name: " fail_table".to_string(),
|
||||
table_id: 2,
|
||||
table: table_to_register,
|
||||
};
|
||||
// this register will fail since schema does not exist yet
|
||||
assert_matches!(
|
||||
catalog_manager
|
||||
.register_table(reg_req.clone())
|
||||
.await
|
||||
.unwrap_err(),
|
||||
catalog::error::Error::SchemaNotFound { .. }
|
||||
);
|
||||
|
||||
let new_catalog = catalog_manager
|
||||
.catalog(&catalog_name)
|
||||
.unwrap()
|
||||
.expect("catalog should exist since it's already registered");
|
||||
let schema = Arc::new(RemoteSchemaProvider::new(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
node_id,
|
||||
backend.clone(),
|
||||
));
|
||||
|
||||
let prev = new_catalog
|
||||
.register_schema(schema_name.clone(), schema.clone())
|
||||
.expect("Register schema should not fail");
|
||||
assert!(prev.is_none());
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
|
||||
assert_eq!(
|
||||
HashSet::from([schema_name.clone()]),
|
||||
new_catalog.schema_names().unwrap().into_iter().collect()
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_next_table_id() {
|
||||
let node_id = 42;
|
||||
let (_, _, catalog_manager) = prepare_components(node_id).await;
|
||||
assert_eq!(
|
||||
MIN_USER_TABLE_ID,
|
||||
catalog_manager.next_table_id().await.unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
MIN_USER_TABLE_ID + 1,
|
||||
catalog_manager.next_table_id().await.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -7,20 +7,34 @@ edition = "2021"
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
async-stream = "0.3"
|
||||
catalog = { path = "../catalog" }
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datatypes = { path = "../datatypes" }
|
||||
enum_dispatch = "0.3"
|
||||
parking_lot = "0.12"
|
||||
rand = "0.8"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tonic = "0.8"
|
||||
|
||||
[dev-dependencies]
|
||||
datanode = { path = "../datanode" }
|
||||
substrait = { path = "../common/substrait" }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
# TODO(ruihang): upgrade to 0.11 once substrait-rs supports it.
|
||||
[dev-dependencies.prost_09]
|
||||
package = "prost"
|
||||
version = "0.9"
|
||||
|
||||
[dev-dependencies.substrait_proto]
|
||||
package = "substrait"
|
||||
version = "0.2"
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::{codec::InsertBatch, *};
|
||||
use client::{Client, Database};
|
||||
|
||||
@@ -10,7 +12,7 @@ fn main() {
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let expr = InsertExpr {
|
||||
@@ -18,6 +20,7 @@ async fn run() {
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: insert_batches(),
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
};
|
||||
db.insert(expr).await.unwrap();
|
||||
}
|
||||
|
||||
96
src/client/examples/logical.rs
Normal file
96
src/client/examples/logical.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
use api::v1::{ColumnDataType, ColumnDef, CreateExpr};
|
||||
use client::{admin::Admin, Client, Database};
|
||||
use prost_09::Message;
|
||||
use substrait_proto::protobuf::{
|
||||
plan_rel::RelType as PlanRelType,
|
||||
read_rel::{NamedTable, ReadType},
|
||||
rel::RelType,
|
||||
PlanRel, ReadRel, Rel,
|
||||
};
|
||||
use tracing::{event, Level};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
|
||||
run();
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
|
||||
let create_table_expr = CreateExpr {
|
||||
catalog_name: Some("greptime".to_string()),
|
||||
schema_name: Some("public".to_string()),
|
||||
table_name: "test_logical_dist_exec".to_string(),
|
||||
desc: None,
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "timestamp".to_string(),
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "key".to_string(),
|
||||
datatype: ColumnDataType::Uint64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "value".to_string(),
|
||||
datatype: ColumnDataType::Uint64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
],
|
||||
time_index: "timestamp".to_string(),
|
||||
primary_keys: vec!["key".to_string()],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
};
|
||||
|
||||
let admin = Admin::new("create table", client.clone());
|
||||
let result = admin.create(create_table_expr).await.unwrap();
|
||||
event!(Level::INFO, "create table result: {:#?}", result);
|
||||
|
||||
let logical = mock_logical_plan();
|
||||
event!(Level::INFO, "plan size: {:#?}", logical.len());
|
||||
let db = Database::new("greptime", client);
|
||||
let result = db.logical_plan(logical).await.unwrap();
|
||||
|
||||
event!(Level::INFO, "result: {:#?}", result);
|
||||
}
|
||||
|
||||
fn mock_logical_plan() -> Vec<u8> {
|
||||
let catalog_name = "greptime".to_string();
|
||||
let schema_name = "public".to_string();
|
||||
let table_name = "test_logical_dist_exec".to_string();
|
||||
|
||||
let named_table = NamedTable {
|
||||
names: vec![catalog_name, schema_name, table_name],
|
||||
advanced_extension: None,
|
||||
};
|
||||
let read_type = ReadType::NamedTable(named_table);
|
||||
|
||||
let read_rel = ReadRel {
|
||||
common: None,
|
||||
base_schema: None,
|
||||
filter: None,
|
||||
projection: None,
|
||||
advanced_extension: None,
|
||||
read_type: Some(read_type),
|
||||
};
|
||||
|
||||
let mut buf = vec![];
|
||||
let rel = Rel {
|
||||
rel_type: Some(RelType::Read(Box::new(read_rel))),
|
||||
};
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
plan_rel.encode(&mut buf).unwrap();
|
||||
|
||||
buf
|
||||
}
|
||||
@@ -16,7 +16,7 @@ fn main() {
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let physical = mock_physical_plan();
|
||||
|
||||
@@ -10,7 +10,7 @@ fn main() {
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let sql = Select::Sql("select * from demo".to_string());
|
||||
|
||||
@@ -22,10 +22,6 @@ impl Admin {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
|
||||
self.client.start(url).await
|
||||
}
|
||||
|
||||
pub async fn create(&self, expr: CreateExpr) -> Result<AdminResult> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
|
||||
@@ -1,47 +1,96 @@
|
||||
use api::v1::{greptime_client::GreptimeClient, *};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::greptime_client::GreptimeClient;
|
||||
use api::v1::*;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use parking_lot::RwLock;
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
use tonic::transport::Channel;
|
||||
|
||||
use crate::error;
|
||||
use crate::load_balance::LoadBalance;
|
||||
use crate::load_balance::Loadbalancer;
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Client {
|
||||
client: Option<GreptimeClient<Channel>>,
|
||||
inner: Arc<Inner>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Inner {
|
||||
channel_manager: ChannelManager,
|
||||
peers: Arc<RwLock<Vec<String>>>,
|
||||
load_balance: Loadbalancer,
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
fn with_manager(channel_manager: ChannelManager) -> Self {
|
||||
Self {
|
||||
channel_manager,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn set_peers(&self, peers: Vec<String>) {
|
||||
let mut guard = self.peers.write();
|
||||
*guard = peers;
|
||||
}
|
||||
|
||||
fn get_peer(&self) -> Option<String> {
|
||||
let guard = self.peers.read();
|
||||
self.load_balance.get_peer(&guard).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
|
||||
match self.client.as_ref() {
|
||||
None => {
|
||||
let url = url.into();
|
||||
let client = GreptimeClient::connect(url.clone())
|
||||
.await
|
||||
.context(error::ConnectFailedSnafu { url })?;
|
||||
self.client = Some(client);
|
||||
Ok(())
|
||||
}
|
||||
Some(_) => error::IllegalGrpcClientStateSnafu {
|
||||
err_msg: "already started",
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn with_client(client: GreptimeClient<Channel>) -> Self {
|
||||
pub fn with_manager(channel_manager: ChannelManager) -> Self {
|
||||
let inner = Arc::new(Inner::with_manager(channel_manager));
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub fn with_urls<U, A>(urls: A) -> Self
|
||||
where
|
||||
U: AsRef<str>,
|
||||
A: AsRef<[U]>,
|
||||
{
|
||||
Self::with_manager_and_urls(ChannelManager::new(), urls)
|
||||
}
|
||||
|
||||
pub fn with_manager_and_urls<U, A>(channel_manager: ChannelManager, urls: A) -> Self
|
||||
where
|
||||
U: AsRef<str>,
|
||||
A: AsRef<[U]>,
|
||||
{
|
||||
let inner = Inner::with_manager(channel_manager);
|
||||
let urls: Vec<String> = urls
|
||||
.as_ref()
|
||||
.iter()
|
||||
.map(|peer| peer.as_ref().to_string())
|
||||
.collect();
|
||||
inner.set_peers(urls);
|
||||
Self {
|
||||
client: Some(client),
|
||||
inner: Arc::new(inner),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn connect(url: impl Into<String>) -> Result<Self> {
|
||||
let url = url.into();
|
||||
let client = GreptimeClient::connect(url.clone())
|
||||
.await
|
||||
.context(error::ConnectFailedSnafu { url })?;
|
||||
Ok(Self {
|
||||
client: Some(client),
|
||||
})
|
||||
pub fn start<U, A>(&self, urls: A)
|
||||
where
|
||||
U: AsRef<str>,
|
||||
A: AsRef<[U]>,
|
||||
{
|
||||
let urls: Vec<String> = urls
|
||||
.as_ref()
|
||||
.iter()
|
||||
.map(|peer| peer.as_ref().to_string())
|
||||
.collect();
|
||||
|
||||
self.inner.set_peers(urls);
|
||||
}
|
||||
|
||||
pub async fn admin(&self, req: AdminRequest) -> Result<AdminResponse> {
|
||||
@@ -73,18 +122,59 @@ impl Client {
|
||||
}
|
||||
|
||||
pub async fn batch(&self, req: BatchRequest) -> Result<BatchResponse> {
|
||||
if let Some(client) = self.client.as_ref() {
|
||||
let res = client
|
||||
.clone()
|
||||
.batch(req)
|
||||
.await
|
||||
.context(error::TonicStatusSnafu)?;
|
||||
Ok(res.into_inner())
|
||||
} else {
|
||||
error::IllegalGrpcClientStateSnafu {
|
||||
err_msg: "not started",
|
||||
}
|
||||
.fail()
|
||||
let peer = self
|
||||
.inner
|
||||
.get_peer()
|
||||
.context(error::IllegalGrpcClientStateSnafu {
|
||||
err_msg: "No available peer found",
|
||||
})?;
|
||||
let mut client = self.make_client(peer)?;
|
||||
let result = client.batch(req).await.context(error::TonicStatusSnafu)?;
|
||||
Ok(result.into_inner())
|
||||
}
|
||||
|
||||
fn make_client(&self, addr: impl AsRef<str>) -> Result<GreptimeClient<Channel>> {
|
||||
let addr = addr.as_ref();
|
||||
let channel = self
|
||||
.inner
|
||||
.channel_manager
|
||||
.get(addr)
|
||||
.context(error::CreateChannelSnafu { addr })?;
|
||||
Ok(GreptimeClient::new(channel))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::Inner;
|
||||
use crate::load_balance::Loadbalancer;
|
||||
|
||||
fn mock_peers() -> Vec<String> {
|
||||
vec![
|
||||
"127.0.0.1:3001".to_string(),
|
||||
"127.0.0.1:3002".to_string(),
|
||||
"127.0.0.1:3003".to_string(),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inner() {
|
||||
let inner = Inner::default();
|
||||
|
||||
assert!(matches!(
|
||||
inner.load_balance,
|
||||
Loadbalancer::Random(crate::load_balance::Random)
|
||||
));
|
||||
assert!(inner.get_peer().is_none());
|
||||
|
||||
let peers = mock_peers();
|
||||
inner.set_peers(peers.clone());
|
||||
let all: HashSet<String> = peers.into_iter().collect();
|
||||
|
||||
for _ in 0..20 {
|
||||
assert!(all.contains(&inner.get_peer().unwrap()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,10 +23,7 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error;
|
||||
use crate::{
|
||||
error::{
|
||||
ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
|
||||
MissingFieldSnafu,
|
||||
},
|
||||
error::{ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu},
|
||||
Client, Result,
|
||||
};
|
||||
|
||||
@@ -46,10 +43,6 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
|
||||
self.client.start(url).await
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
@@ -65,6 +58,24 @@ impl Database {
|
||||
self.object(expr).await?.try_into()
|
||||
}
|
||||
|
||||
pub async fn batch_insert(&self, insert_exprs: Vec<InsertExpr>) -> Result<Vec<ObjectResult>> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
};
|
||||
let obj_exprs = insert_exprs
|
||||
.into_iter()
|
||||
.map(|expr| ObjectExpr {
|
||||
header: Some(header.clone()),
|
||||
expr: Some(object_expr::Expr::Insert(expr)),
|
||||
})
|
||||
.collect();
|
||||
self.objects(obj_exprs)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|result| result.try_into())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub async fn select(&self, expr: Select) -> Result<ObjectResult> {
|
||||
let select_expr = match expr {
|
||||
Select::Sql(sql) => SelectExpr {
|
||||
@@ -92,6 +103,13 @@ impl Database {
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
|
||||
};
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
async fn do_select(&self, select_expr: SelectExpr) -> Result<ObjectResult> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
@@ -222,12 +240,8 @@ impl TryFrom<ObjectResult> for Output {
|
||||
}
|
||||
|
||||
fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
let wrapper = ColumnDataTypeWrapper::try_new(
|
||||
column
|
||||
.datatype
|
||||
.context(MissingFieldSnafu { field: "datatype" })?,
|
||||
)
|
||||
.context(error::ColumnDataTypeSnafu)?;
|
||||
let wrapper =
|
||||
ColumnDataTypeWrapper::try_new(column.datatype).context(error::ColumnDataTypeSnafu)?;
|
||||
let column_datatype = wrapper.datatype();
|
||||
|
||||
let rows = rows as usize;
|
||||
@@ -330,7 +344,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_column_to_vector() {
|
||||
let mut column = create_test_column(Arc::new(BooleanVector::from(vec![true])));
|
||||
column.datatype = Some(-100);
|
||||
column.datatype = -100;
|
||||
let result = column_to_vector(&column, 1);
|
||||
assert!(result.is_err());
|
||||
assert_eq!(
|
||||
@@ -408,7 +422,7 @@ mod tests {
|
||||
semantic_type: 1,
|
||||
values: Some(values(&[array.clone()]).unwrap()),
|
||||
null_mask: null_mask(&vec![array], vector.len()),
|
||||
datatype: Some(wrapper.datatype() as i32),
|
||||
datatype: wrapper.datatype() as i32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,6 +85,17 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to create gRPC channel, peer address: {}, source: {}",
|
||||
addr,
|
||||
source
|
||||
))]
|
||||
CreateChannel {
|
||||
addr: String,
|
||||
#[snafu(backtrace)]
|
||||
source: common_grpc::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -107,6 +118,7 @@ impl ErrorExt for Error {
|
||||
source.status_code()
|
||||
}
|
||||
Error::CreateRecordBatches { source } => source.status_code(),
|
||||
Error::CreateChannel { source, .. } => source.status_code(),
|
||||
Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,9 @@ pub mod admin;
|
||||
mod client;
|
||||
mod database;
|
||||
mod error;
|
||||
pub mod load_balance;
|
||||
|
||||
pub use api;
|
||||
|
||||
pub use self::{
|
||||
client::Client,
|
||||
|
||||
52
src/client/src/load_balance.rs
Normal file
52
src/client/src/load_balance.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
use enum_dispatch::enum_dispatch;
|
||||
use rand::seq::SliceRandom;
|
||||
|
||||
#[enum_dispatch]
|
||||
pub trait LoadBalance {
|
||||
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String>;
|
||||
}
|
||||
|
||||
#[enum_dispatch(LoadBalance)]
|
||||
#[derive(Debug)]
|
||||
pub enum Loadbalancer {
|
||||
Random,
|
||||
}
|
||||
|
||||
impl Default for Loadbalancer {
|
||||
fn default() -> Self {
|
||||
Loadbalancer::from(Random)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Random;
|
||||
|
||||
impl LoadBalance for Random {
|
||||
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String> {
|
||||
peers.choose(&mut rand::thread_rng())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::{LoadBalance, Random};
|
||||
|
||||
#[test]
|
||||
fn test_random_lb() {
|
||||
let peers = vec![
|
||||
"127.0.0.1:3001".to_string(),
|
||||
"127.0.0.1:3002".to_string(),
|
||||
"127.0.0.1:3003".to_string(),
|
||||
"127.0.0.1:3004".to_string(),
|
||||
];
|
||||
let all: HashSet<String> = peers.clone().into_iter().collect();
|
||||
|
||||
let random = Random;
|
||||
for _ in 0..100 {
|
||||
let peer = random.get_peer(&peers).unwrap();
|
||||
all.contains(peer);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
name = "cmd"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
default-run = "greptime"
|
||||
|
||||
[[bin]]
|
||||
name = "greptime"
|
||||
@@ -10,10 +11,13 @@ path = "src/bin/greptime.rs"
|
||||
[dependencies]
|
||||
clap = { version = "3.1", features = ["derive"] }
|
||||
common-error = { path = "../common/error" }
|
||||
common-telemetry = { path = "../common/telemetry", features = ["deadlock_detection"] }
|
||||
common-telemetry = { path = "../common/telemetry", features = [
|
||||
"deadlock_detection",
|
||||
] }
|
||||
datanode = { path = "../datanode" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
toml = "0.5"
|
||||
|
||||
@@ -4,7 +4,9 @@ use clap::Parser;
|
||||
use cmd::datanode;
|
||||
use cmd::error::Result;
|
||||
use cmd::frontend;
|
||||
use common_telemetry::{self, logging::error, logging::info};
|
||||
use cmd::metasrv;
|
||||
use common_telemetry::logging::error;
|
||||
use common_telemetry::logging::info;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "greptimedb")]
|
||||
@@ -29,6 +31,8 @@ enum SubCommand {
|
||||
Datanode(datanode::Command),
|
||||
#[clap(name = "frontend")]
|
||||
Frontend(frontend::Command),
|
||||
#[clap(name = "metasrv")]
|
||||
Metasrv(metasrv::Command),
|
||||
}
|
||||
|
||||
impl SubCommand {
|
||||
@@ -36,6 +40,7 @@ impl SubCommand {
|
||||
match self {
|
||||
SubCommand::Datanode(cmd) => cmd.run().await,
|
||||
SubCommand::Frontend(cmd) => cmd.run().await,
|
||||
SubCommand::Metasrv(cmd) => cmd.run().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -45,6 +50,7 @@ impl fmt::Display for SubCommand {
|
||||
match self {
|
||||
SubCommand::Datanode(..) => write!(f, "greptime-datanode"),
|
||||
SubCommand::Frontend(..) => write!(f, "greptime-frontend"),
|
||||
SubCommand::Metasrv(..) => write!(f, "greptime-metasrv"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions};
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, Mode};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, Result, StartDatanodeSnafu};
|
||||
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -33,6 +33,8 @@ impl SubCommand {
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
@@ -41,6 +43,8 @@ struct StartCommand {
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
metasrv_addr: Option<String>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
}
|
||||
@@ -84,6 +88,31 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
opts.postgres_addr = addr;
|
||||
}
|
||||
|
||||
match (cmd.metasrv_addr, cmd.node_id) {
|
||||
(Some(meta_addr), Some(node_id)) => {
|
||||
// Running mode is only set to Distributed when
|
||||
// both metasrv addr and node id are set in
|
||||
// commandline options
|
||||
opts.meta_client_opts.metasrv_addr = meta_addr;
|
||||
opts.node_id = node_id;
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
(None, None) => {
|
||||
opts.mode = Mode::Standalone;
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return MissingConfigSnafu {
|
||||
msg: "Missing metasrv address option",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return MissingConfigSnafu {
|
||||
msg: "Missing node id option",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
@@ -97,10 +126,12 @@ mod tests {
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let cmd = StartCommand {
|
||||
node_id: None,
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
@@ -112,6 +143,13 @@ mod tests {
|
||||
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
|
||||
assert_eq!("0.0.0.0:3306".to_string(), options.mysql_addr);
|
||||
assert_eq!(4, options.mysql_runtime_size);
|
||||
assert_eq!(
|
||||
"1.1.1.1:3002".to_string(),
|
||||
options.meta_client_opts.metasrv_addr
|
||||
);
|
||||
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
|
||||
assert_eq!(3000, options.meta_client_opts.timeout_millis);
|
||||
assert!(options.meta_client_opts.tcp_nodelay);
|
||||
|
||||
assert_eq!("0.0.0.0:5432".to_string(), options.postgres_addr);
|
||||
assert_eq!(4, options.postgres_runtime_size);
|
||||
@@ -122,4 +160,58 @@ mod tests {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_cmd() {
|
||||
assert_eq!(
|
||||
Mode::Standalone,
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None
|
||||
})
|
||||
.unwrap()
|
||||
.mode
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Mode::Distributed,
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None
|
||||
})
|
||||
.unwrap()
|
||||
.mode
|
||||
);
|
||||
|
||||
assert!(DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None,
|
||||
})
|
||||
.is_err());
|
||||
assert!(DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None,
|
||||
})
|
||||
.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,14 +17,27 @@ pub enum Error {
|
||||
source: frontend::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start meta server, source: {}", source))]
|
||||
StartMetaServer {
|
||||
#[snafu(backtrace)]
|
||||
source: meta_srv::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read config file: {}, source: {}", path, source))]
|
||||
ReadConfig {
|
||||
source: std::io::Error,
|
||||
path: String,
|
||||
source: std::io::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse config, source: {}", source))]
|
||||
ParseConfig { source: toml::de::Error },
|
||||
ParseConfig {
|
||||
source: toml::de::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing config, msg: {}", msg))]
|
||||
MissingConfig { msg: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -34,7 +47,10 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Error::StartDatanode { source } => source.status_code(),
|
||||
Error::StartFrontend { source } => source.status_code(),
|
||||
Error::ReadConfig { .. } | Error::ParseConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::StartMetaServer { source } => source.status_code(),
|
||||
Error::ReadConfig { .. } | Error::ParseConfig { .. } | Error::MissingConfig { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,18 +67,68 @@ impl ErrorExt for Error {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn raise_read_config_error() -> std::result::Result<(), std::io::Error> {
|
||||
Err(std::io::ErrorKind::NotFound.into())
|
||||
type StdResult<E> = std::result::Result<(), E>;
|
||||
|
||||
#[test]
|
||||
fn test_start_node_error() {
|
||||
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
|
||||
datanode::error::MissingFieldSnafu {
|
||||
field: "test_field",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
let e = throw_datanode_error()
|
||||
.context(StartDatanodeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error() {
|
||||
let e = raise_read_config_error()
|
||||
fn test_start_frontend_error() {
|
||||
fn throw_frontend_error() -> StdResult<frontend::error::Error> {
|
||||
frontend::error::InvalidSqlSnafu { err_msg: "failed" }.fail()
|
||||
}
|
||||
|
||||
let e = throw_frontend_error()
|
||||
.context(StartFrontendSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_start_metasrv_error() {
|
||||
fn throw_metasrv_error() -> StdResult<meta_srv::error::Error> {
|
||||
meta_srv::error::StreamNoneSnafu {}.fail()
|
||||
}
|
||||
|
||||
let e = throw_metasrv_error()
|
||||
.context(StartMetaServerSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_config_error() {
|
||||
fn throw_read_config_error() -> StdResult<std::io::Error> {
|
||||
Err(std::io::ErrorKind::NotFound.into())
|
||||
}
|
||||
|
||||
let e = throw_read_config_error()
|
||||
.context(ReadConfigSnafu { path: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_none());
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use clap::Parser;
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::grpc::GrpcOptions;
|
||||
use frontend::influxdb::InfluxdbOptions;
|
||||
use frontend::instance::Instance;
|
||||
use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
@@ -35,7 +37,7 @@ impl SubCommand {
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct StartCommand {
|
||||
pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
@@ -55,7 +57,7 @@ struct StartCommand {
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let opts = self.try_into()?;
|
||||
let mut frontend = Frontend::new(opts);
|
||||
let mut frontend = Frontend::new(opts, Instance::new());
|
||||
frontend.start().await.context(error::StartFrontendSnafu)
|
||||
}
|
||||
}
|
||||
@@ -74,7 +76,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.http_addr = Some(addr);
|
||||
}
|
||||
if let Some(addr) = cmd.grpc_addr {
|
||||
opts.grpc_addr = Some(addr);
|
||||
opts.grpc_options = Some(GrpcOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.mysql_addr {
|
||||
opts.mysql_options = Some(MysqlOptions {
|
||||
@@ -130,7 +135,10 @@ mod tests {
|
||||
);
|
||||
|
||||
let default_opts = FrontendOptions::default();
|
||||
assert_eq!(opts.grpc_addr, default_opts.grpc_addr);
|
||||
assert_eq!(
|
||||
opts.grpc_options.unwrap().addr,
|
||||
default_opts.grpc_options.unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
opts.mysql_options.as_ref().unwrap().runtime_size,
|
||||
default_opts.mysql_options.as_ref().unwrap().runtime_size
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod datanode;
|
||||
pub mod error;
|
||||
pub mod frontend;
|
||||
pub mod metasrv;
|
||||
mod toml_loader;
|
||||
|
||||
122
src/cmd/src/metasrv.rs
Normal file
122
src/cmd/src/metasrv.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use meta_srv::bootstrap;
|
||||
use meta_srv::metasrv::MetaSrvOptions;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct Command {
|
||||
#[clap(subcommand)]
|
||||
subcmd: SubCommand,
|
||||
}
|
||||
|
||||
impl Command {
|
||||
pub async fn run(self) -> Result<()> {
|
||||
self.subcmd.run().await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
enum SubCommand {
|
||||
Start(StartCommand),
|
||||
}
|
||||
|
||||
impl SubCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
match self {
|
||||
SubCommand::Start(cmd) => cmd.run().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
bind_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
server_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
store_addr: Option<String>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
logging::info!("MetaSrv start command: {:#?}", self);
|
||||
|
||||
let opts: MetaSrvOptions = self.try_into()?;
|
||||
|
||||
logging::info!("MetaSrv options: {:#?}", opts);
|
||||
|
||||
bootstrap::bootstrap_meta_srv(opts)
|
||||
.await
|
||||
.context(error::StartMetaServerSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<StartCommand> for MetaSrvOptions {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(cmd: StartCommand) -> Result<Self> {
|
||||
let mut opts: MetaSrvOptions = if let Some(path) = cmd.config_file {
|
||||
toml_loader::from_file!(&path)?
|
||||
} else {
|
||||
MetaSrvOptions::default()
|
||||
};
|
||||
|
||||
if let Some(addr) = cmd.bind_addr {
|
||||
opts.bind_addr = addr;
|
||||
}
|
||||
if let Some(addr) = cmd.server_addr {
|
||||
opts.server_addr = addr;
|
||||
}
|
||||
if let Some(addr) = cmd.store_addr {
|
||||
opts.store_addr = addr;
|
||||
}
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_read_from_cmd() {
|
||||
let cmd = StartCommand {
|
||||
bind_addr: Some("127.0.0.1:3002".to_string()),
|
||||
server_addr: Some("0.0.0.0:3002".to_string()),
|
||||
store_addr: Some("127.0.0.1:2380".to_string()),
|
||||
config_file: None,
|
||||
};
|
||||
let options: MetaSrvOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
|
||||
assert_eq!("0.0.0.0:3002".to_string(), options.server_addr);
|
||||
assert_eq!("127.0.0.1:2380".to_string(), options.store_addr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let cmd = StartCommand {
|
||||
bind_addr: None,
|
||||
server_addr: None,
|
||||
store_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/metasrv.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
};
|
||||
let options: MetaSrvOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
|
||||
assert_eq!("0.0.0.0:3002".to_string(), options.server_addr);
|
||||
assert_eq!("127.0.0.1:2380".to_string(), options.store_addr);
|
||||
assert_eq!(30, options.datanode_lease_secs);
|
||||
}
|
||||
}
|
||||
22
src/common/catalog/Cargo.toml
Normal file
22
src/common/catalog/Cargo.toml
Normal file
@@ -0,0 +1,22 @@
|
||||
[package]
|
||||
name = "common-catalog"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
common-error = { path = "../error" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
lazy_static = "1.4"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
tempdir = "0.3"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
@@ -11,3 +11,9 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
|
||||
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
|
||||
/// scripts table id
|
||||
pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";
|
||||
49
src/common/catalog/src/error.rs
Normal file
49
src/common/catalog/src/error.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Invalid catalog info: {}", key))]
|
||||
InvalidCatalog { key: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to deserialize catalog entry value: {}", raw))]
|
||||
DeserializeCatalogEntryValue {
|
||||
raw: String,
|
||||
backtrace: Backtrace,
|
||||
source: serde_json::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize catalog entry value"))]
|
||||
SerializeCatalogEntryValue {
|
||||
backtrace: Backtrace,
|
||||
source: serde_json::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse node id: {}", key))]
|
||||
ParseNodeId { key: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::InvalidCatalog { .. }
|
||||
| Error::DeserializeCatalogEntryValue { .. }
|
||||
| Error::SerializeCatalogEntryValue { .. } => StatusCode::Unexpected,
|
||||
Error::ParseNodeId { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
340
src/common/catalog/src/helper.rs
Normal file
340
src/common/catalog/src/helper.rs
Normal file
@@ -0,0 +1,340 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::{RawTableMeta, TableId, TableVersion};
|
||||
|
||||
use crate::consts::{
|
||||
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
|
||||
};
|
||||
use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
SCHEMA_KEY_PREFIX
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
TABLE_GLOBAL_KEY_PREFIX
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn build_catalog_prefix() -> String {
|
||||
format!("{}-", CATALOG_KEY_PREFIX)
|
||||
}
|
||||
|
||||
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
|
||||
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
|
||||
}
|
||||
|
||||
pub fn build_table_global_prefix(
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
catalog_name.as_ref(),
|
||||
schema_name.as_ref()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn build_table_regional_prefix(
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
catalog_name.as_ref(),
|
||||
schema_name.as_ref()
|
||||
)
|
||||
}
|
||||
|
||||
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
|
||||
pub struct TableGlobalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
impl Display for TableGlobalKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.table_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableGlobalKey {
|
||||
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = TABLE_GLOBAL_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
|
||||
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
table_name: captures[3].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Table id is the same across all datanodes.
|
||||
pub id: TableId,
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
/// Allocation of region ids across all datanodes.
|
||||
pub regions_id_map: HashMap<u64, Vec<u32>>,
|
||||
/// Node id -> region ids
|
||||
pub meta: RawTableMeta,
|
||||
/// Partition rules for table
|
||||
pub partition_rules: String,
|
||||
}
|
||||
|
||||
/// Table regional info that varies between datanode, so it contains a `node_id` field.
|
||||
pub struct TableRegionalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl Display for TableRegionalKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.table_name)?;
|
||||
f.write_str("-")?;
|
||||
f.serialize_u64(self.node_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableRegionalKey {
|
||||
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = TABLE_REGIONAL_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
|
||||
let node_id = captures[4]
|
||||
.to_string()
|
||||
.parse()
|
||||
.map_err(|_| InvalidCatalogSnafu { key }.build())?;
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
table_name: captures[3].to_string(),
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Regional table info of specific datanode, including table version on that datanode and
|
||||
/// region ids allocated by metasrv.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TableRegionalValue {
|
||||
pub version: TableVersion,
|
||||
pub regions_ids: Vec<u32>,
|
||||
}
|
||||
|
||||
pub struct CatalogKey {
|
||||
pub catalog_name: String,
|
||||
}
|
||||
|
||||
impl Display for CatalogKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(CATALOG_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl CatalogKey {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = CATALOG_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CatalogValue;
|
||||
|
||||
pub struct SchemaKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
}
|
||||
|
||||
impl Display for SchemaKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(SCHEMA_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaKey {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = SCHEMA_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SchemaValue;
|
||||
|
||||
macro_rules! define_catalog_value {
|
||||
( $($val_ty: ty), *) => {
|
||||
$(
|
||||
impl $val_ty {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
serde_json::from_str(s.as_ref())
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
.into_bytes())
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
define_catalog_value!(
|
||||
TableRegionalValue,
|
||||
TableGlobalValue,
|
||||
CatalogValue,
|
||||
SchemaValue
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_catalog_key() {
|
||||
let key = "__c-C";
|
||||
let catalog_key = CatalogKey::parse(key).unwrap();
|
||||
assert_eq!("C", catalog_key.catalog_name);
|
||||
assert_eq!(key, catalog_key.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_schema_key() {
|
||||
let key = "__s-C-S";
|
||||
let schema_key = SchemaKey::parse(key).unwrap();
|
||||
assert_eq!("C", schema_key.catalog_name);
|
||||
assert_eq!("S", schema_key.schema_name);
|
||||
assert_eq!(key, schema_key.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table_key() {
|
||||
let key = "__tg-C-S-T";
|
||||
let entry = TableGlobalKey::parse(key).unwrap();
|
||||
assert_eq!("C", entry.catalog_name);
|
||||
assert_eq!("S", entry.schema_name);
|
||||
assert_eq!("T", entry.table_name);
|
||||
assert_eq!(key, &entry.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_prefix() {
|
||||
assert_eq!("__c-", build_catalog_prefix());
|
||||
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
|
||||
assert_eq!(
|
||||
"__tg-CATALOG-SCHEMA-",
|
||||
build_table_global_prefix("CATALOG", "SCHEMA")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_schema() {
|
||||
let schema = Schema::new(vec![ColumnSchema::new(
|
||||
"name",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)]);
|
||||
|
||||
let meta = RawTableMeta {
|
||||
schema: RawSchema::from(&schema),
|
||||
engine: "mito".to_string(),
|
||||
created_on: chrono::DateTime::default(),
|
||||
primary_key_indices: vec![0, 1],
|
||||
next_column_id: 3,
|
||||
engine_options: Default::default(),
|
||||
value_indices: vec![2, 3],
|
||||
options: Default::default(),
|
||||
region_numbers: vec![1],
|
||||
};
|
||||
|
||||
let value = TableGlobalValue {
|
||||
id: 42,
|
||||
node_id: 0,
|
||||
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
|
||||
meta,
|
||||
partition_rules: "{}".to_string(),
|
||||
};
|
||||
let serialized = serde_json::to_string(&value).unwrap();
|
||||
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
|
||||
assert_eq!(value, deserialized);
|
||||
}
|
||||
}
|
||||
9
src/common/catalog/src/lib.rs
Normal file
9
src/common/catalog/src/lib.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
mod helper;
|
||||
|
||||
pub use helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
@@ -7,12 +7,12 @@ edition = "2021"
|
||||
proc-macro = true
|
||||
|
||||
[dependencies]
|
||||
common-query = { path = "../query" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
quote = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
syn = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
arc-swap = "1.0"
|
||||
common-query = { path = "../query" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
static_assertions = "1.1.0"
|
||||
|
||||
@@ -8,6 +8,7 @@ arc-swap = "1.0"
|
||||
chrono-tz = "0.6"
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod math;
|
||||
pub mod numpy;
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test;
|
||||
mod timestamp;
|
||||
pub mod udf;
|
||||
|
||||
pub use aggregate::MedianAccumulatorCreator;
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
|
||||
use crate::scalars::function::FunctionRef;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::numpy::NumpyFunction;
|
||||
use crate::scalars::timestamp::TimestampFunction;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FunctionRegistry {
|
||||
@@ -31,6 +32,10 @@ impl FunctionRegistry {
|
||||
.insert(func.name(), func);
|
||||
}
|
||||
|
||||
pub fn get_aggr_function(&self, name: &str) -> Option<AggregateFunctionMetaRef> {
|
||||
self.aggregate_functions.read().unwrap().get(name).cloned()
|
||||
}
|
||||
|
||||
pub fn get_function(&self, name: &str) -> Option<FunctionRef> {
|
||||
self.functions.read().unwrap().get(name).cloned()
|
||||
}
|
||||
@@ -54,6 +59,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
|
||||
MathFunction::register(&function_registry);
|
||||
NumpyFunction::register(&function_registry);
|
||||
TimestampFunction::register(&function_registry);
|
||||
|
||||
AggregateFunctions::register(&function_registry);
|
||||
|
||||
|
||||
116
src/common/function/src/scalars/timestamp/from_unixtime.rs
Normal file
116
src/common/function/src/scalars/timestamp/from_unixtime.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
//! from_unixtime function.
|
||||
/// TODO(dennis) It can be removed after we upgrade datafusion.
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute::arithmetics;
|
||||
use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use arrow::scalar::PrimitiveScalar;
|
||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::TimestampVector;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct FromUnixtimeFunction;
|
||||
|
||||
const NAME: &str = "from_unixtime";
|
||||
|
||||
impl Function for FromUnixtimeFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::timestamp_millis_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(
|
||||
1,
|
||||
vec![ConcreteDataType::int64_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
match columns[0].data_type() {
|
||||
ConcreteDataType::Int64(_) => {
|
||||
let array = columns[0].to_arrow_array();
|
||||
// Our timestamp vector's time unit is millisecond
|
||||
let array = arithmetics::mul_scalar(
|
||||
&*array,
|
||||
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
|
||||
);
|
||||
|
||||
Ok(Arc::new(
|
||||
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int64,
|
||||
})?,
|
||||
))
|
||||
}
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FromUnixtimeFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "FROM_UNIXTIME")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::Int64Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_from_unixtime() {
|
||||
let f = FromUnixtimeFunction::default();
|
||||
assert_eq!("from_unixtime", f.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
f.return_type(&[]).unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Uniform(1, valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::int64_datatype()]
|
||||
));
|
||||
|
||||
let times = vec![Some(1494410783), None, Some(1494410983)];
|
||||
let args: Vec<VectorRef> = vec![Arc::new(Int64Vector::from(times.clone()))];
|
||||
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, t) in times.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
if i == 1 {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(ts.value(), t.unwrap() * 1000);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
src/common/function/src/scalars/timestamp/mod.rs
Normal file
14
src/common/function/src/scalars/timestamp/mod.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
mod from_unixtime;
|
||||
|
||||
use from_unixtime::FromUnixtimeFunction;
|
||||
|
||||
use crate::scalars::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct TimestampFunction;
|
||||
|
||||
impl TimestampFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(FromUnixtimeFunction::default()));
|
||||
}
|
||||
}
|
||||
@@ -8,10 +8,23 @@ api = { path = "../../api" }
|
||||
async-trait = "0.1"
|
||||
common-base = { path = "../base" }
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
tonic = "0.8"
|
||||
tower = "0.4"
|
||||
|
||||
[dependencies.arrow]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
rand = "0.8"
|
||||
|
||||
[[bench]]
|
||||
name = "bench_main"
|
||||
harness = false
|
||||
|
||||
7
src/common/grpc/benches/bench_main.rs
Normal file
7
src/common/grpc/benches/bench_main.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
use criterion::criterion_main;
|
||||
|
||||
mod channel_manager;
|
||||
|
||||
criterion_main! {
|
||||
channel_manager::benches
|
||||
}
|
||||
34
src/common/grpc/benches/channel_manager.rs
Normal file
34
src/common/grpc/benches/channel_manager.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
#[tokio::main]
|
||||
async fn do_bench_channel_manager() {
|
||||
let m = ChannelManager::new();
|
||||
let task_count = 8;
|
||||
let mut joins = Vec::with_capacity(task_count);
|
||||
|
||||
for _ in 0..task_count {
|
||||
let m_clone = m.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
for _ in 0..10000 {
|
||||
let idx = rand::random::<usize>() % 100;
|
||||
let ret = m_clone.get(format!("{}", idx));
|
||||
assert!(ret.is_ok());
|
||||
}
|
||||
});
|
||||
joins.push(join);
|
||||
}
|
||||
|
||||
for join in joins {
|
||||
let _ = join.await;
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_channel_manager(c: &mut Criterion) {
|
||||
c.bench_function("bench channel manager", |b| {
|
||||
b.iter(do_bench_channel_manager);
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_channel_manager);
|
||||
criterion_main!(benches);
|
||||
534
src/common/grpc/src/channel_manager.rs
Normal file
534
src/common/grpc/src/channel_manager.rs
Normal file
@@ -0,0 +1,534 @@
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use dashmap::mapref::entry::Entry;
|
||||
use dashmap::DashMap;
|
||||
use snafu::ResultExt;
|
||||
use tonic::transport::Channel as InnerChannel;
|
||||
use tonic::transport::Endpoint;
|
||||
use tonic::transport::Uri;
|
||||
use tower::make::MakeConnection;
|
||||
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
|
||||
const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ChannelManager {
|
||||
config: ChannelConfig,
|
||||
pool: Arc<Pool>,
|
||||
}
|
||||
|
||||
impl Default for ChannelManager {
|
||||
fn default() -> Self {
|
||||
ChannelManager::with_config(ChannelConfig::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl ChannelManager {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn with_config(config: ChannelConfig) -> Self {
|
||||
let pool = Arc::new(Pool::default());
|
||||
let cloned_pool = pool.clone();
|
||||
|
||||
common_runtime::spawn_bg(async {
|
||||
recycle_channel_in_loop(cloned_pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
|
||||
});
|
||||
|
||||
Self { config, pool }
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &ChannelConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
|
||||
let addr = addr.as_ref();
|
||||
// It will acquire the read lock.
|
||||
if let Some(inner_ch) = self.pool.get(addr) {
|
||||
return Ok(inner_ch);
|
||||
}
|
||||
|
||||
// It will acquire the write lock.
|
||||
let entry = match self.pool.entry(addr.to_string()) {
|
||||
Entry::Occupied(entry) => {
|
||||
entry.get().increase_access();
|
||||
entry.into_ref()
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let endpoint = self.build_endpoint(addr)?;
|
||||
let inner_channel = endpoint.connect_lazy();
|
||||
|
||||
let channel = Channel {
|
||||
channel: inner_channel,
|
||||
access: AtomicUsize::new(1),
|
||||
use_default_connector: true,
|
||||
};
|
||||
entry.insert(channel)
|
||||
}
|
||||
};
|
||||
Ok(entry.channel.clone())
|
||||
}
|
||||
|
||||
pub fn reset_with_connector<C>(
|
||||
&self,
|
||||
addr: impl AsRef<str>,
|
||||
connector: C,
|
||||
) -> Result<InnerChannel>
|
||||
where
|
||||
C: MakeConnection<Uri> + Send + 'static,
|
||||
C::Connection: Unpin + Send + 'static,
|
||||
C::Future: Send + 'static,
|
||||
Box<dyn std::error::Error + Send + Sync>: From<C::Error> + Send + 'static,
|
||||
{
|
||||
let addr = addr.as_ref();
|
||||
let endpoint = self.build_endpoint(addr)?;
|
||||
let inner_channel = endpoint.connect_with_connector_lazy(connector);
|
||||
let channel = Channel {
|
||||
channel: inner_channel.clone(),
|
||||
access: AtomicUsize::new(1),
|
||||
use_default_connector: false,
|
||||
};
|
||||
self.pool.put(addr, channel);
|
||||
|
||||
Ok(inner_channel)
|
||||
}
|
||||
|
||||
pub fn retain_channel<F>(&self, f: F)
|
||||
where
|
||||
F: FnMut(&String, &mut Channel) -> bool,
|
||||
{
|
||||
self.pool.retain_channel(f);
|
||||
}
|
||||
|
||||
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
|
||||
let mut endpoint =
|
||||
Endpoint::new(format!("http://{}", addr)).context(error::CreateChannelSnafu)?;
|
||||
|
||||
if let Some(dur) = self.config.timeout {
|
||||
endpoint = endpoint.timeout(dur);
|
||||
}
|
||||
if let Some(dur) = self.config.connect_timeout {
|
||||
endpoint = endpoint.connect_timeout(dur);
|
||||
}
|
||||
if let Some(limit) = self.config.concurrency_limit {
|
||||
endpoint = endpoint.concurrency_limit(limit);
|
||||
}
|
||||
if let Some((limit, dur)) = self.config.rate_limit {
|
||||
endpoint = endpoint.rate_limit(limit, dur);
|
||||
}
|
||||
if let Some(size) = self.config.initial_stream_window_size {
|
||||
endpoint = endpoint.initial_stream_window_size(size);
|
||||
}
|
||||
if let Some(size) = self.config.initial_connection_window_size {
|
||||
endpoint = endpoint.initial_connection_window_size(size);
|
||||
}
|
||||
if let Some(dur) = self.config.http2_keep_alive_interval {
|
||||
endpoint = endpoint.http2_keep_alive_interval(dur);
|
||||
}
|
||||
if let Some(dur) = self.config.http2_keep_alive_timeout {
|
||||
endpoint = endpoint.keep_alive_timeout(dur);
|
||||
}
|
||||
if let Some(enabled) = self.config.http2_keep_alive_while_idle {
|
||||
endpoint = endpoint.keep_alive_while_idle(enabled);
|
||||
}
|
||||
if let Some(enabled) = self.config.http2_adaptive_window {
|
||||
endpoint = endpoint.http2_adaptive_window(enabled);
|
||||
}
|
||||
endpoint = endpoint
|
||||
.tcp_keepalive(self.config.tcp_keepalive)
|
||||
.tcp_nodelay(self.config.tcp_nodelay);
|
||||
|
||||
Ok(endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ChannelConfig {
|
||||
pub timeout: Option<Duration>,
|
||||
pub connect_timeout: Option<Duration>,
|
||||
pub concurrency_limit: Option<usize>,
|
||||
pub rate_limit: Option<(u64, Duration)>,
|
||||
pub initial_stream_window_size: Option<u32>,
|
||||
pub initial_connection_window_size: Option<u32>,
|
||||
pub http2_keep_alive_interval: Option<Duration>,
|
||||
pub http2_keep_alive_timeout: Option<Duration>,
|
||||
pub http2_keep_alive_while_idle: Option<bool>,
|
||||
pub http2_adaptive_window: Option<bool>,
|
||||
pub tcp_keepalive: Option<Duration>,
|
||||
pub tcp_nodelay: bool,
|
||||
}
|
||||
|
||||
impl Default for ChannelConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
timeout: None,
|
||||
connect_timeout: None,
|
||||
concurrency_limit: None,
|
||||
rate_limit: None,
|
||||
initial_stream_window_size: None,
|
||||
initial_connection_window_size: None,
|
||||
http2_keep_alive_interval: None,
|
||||
http2_keep_alive_timeout: None,
|
||||
http2_keep_alive_while_idle: None,
|
||||
http2_adaptive_window: None,
|
||||
tcp_keepalive: None,
|
||||
tcp_nodelay: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ChannelConfig {
|
||||
pub fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
/// A timeout to each request.
|
||||
pub fn timeout(self, timeout: Duration) -> Self {
|
||||
Self {
|
||||
timeout: Some(timeout),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// A timeout to connecting to the uri.
|
||||
///
|
||||
/// Defaults to no timeout.
|
||||
pub fn connect_timeout(self, timeout: Duration) -> Self {
|
||||
Self {
|
||||
connect_timeout: Some(timeout),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// A concurrency limit to each request.
|
||||
pub fn concurrency_limit(self, limit: usize) -> Self {
|
||||
Self {
|
||||
concurrency_limit: Some(limit),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// A rate limit to each request.
|
||||
pub fn rate_limit(self, limit: u64, duration: Duration) -> Self {
|
||||
Self {
|
||||
rate_limit: Some((limit, duration)),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the SETTINGS_INITIAL_WINDOW_SIZE option for HTTP2 stream-level flow control.
|
||||
/// Default is 65,535
|
||||
pub fn initial_stream_window_size(self, size: u32) -> Self {
|
||||
Self {
|
||||
initial_stream_window_size: Some(size),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the max connection-level flow control for HTTP2
|
||||
///
|
||||
/// Default is 65,535
|
||||
pub fn initial_connection_window_size(self, size: u32) -> Self {
|
||||
Self {
|
||||
initial_connection_window_size: Some(size),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set http2 KEEP_ALIVE_INTERVAL. Uses hyper’s default otherwise.
|
||||
pub fn http2_keep_alive_interval(self, duration: Duration) -> Self {
|
||||
Self {
|
||||
http2_keep_alive_interval: Some(duration),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set http2 KEEP_ALIVE_TIMEOUT. Uses hyper’s default otherwise.
|
||||
pub fn http2_keep_alive_timeout(self, duration: Duration) -> Self {
|
||||
Self {
|
||||
http2_keep_alive_timeout: Some(duration),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set http2 KEEP_ALIVE_WHILE_IDLE. Uses hyper’s default otherwise.
|
||||
pub fn http2_keep_alive_while_idle(self, enabled: bool) -> Self {
|
||||
Self {
|
||||
http2_keep_alive_while_idle: Some(enabled),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets whether to use an adaptive flow control. Uses hyper’s default otherwise.
|
||||
pub fn http2_adaptive_window(self, enabled: bool) -> Self {
|
||||
Self {
|
||||
http2_adaptive_window: Some(enabled),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set whether TCP keepalive messages are enabled on accepted connections.
|
||||
///
|
||||
/// If None is specified, keepalive is disabled, otherwise the duration specified
|
||||
/// will be the time to remain idle before sending TCP keepalive probes.
|
||||
///
|
||||
/// Default is no keepalive (None)
|
||||
pub fn tcp_keepalive(self, duration: Duration) -> Self {
|
||||
Self {
|
||||
tcp_keepalive: Some(duration),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the value of TCP_NODELAY option for accepted connections.
|
||||
///
|
||||
/// Enabled by default.
|
||||
pub fn tcp_nodelay(self, enabled: bool) -> Self {
|
||||
Self {
|
||||
tcp_nodelay: enabled,
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Channel {
|
||||
channel: InnerChannel,
|
||||
access: AtomicUsize,
|
||||
use_default_connector: bool,
|
||||
}
|
||||
|
||||
impl Channel {
|
||||
#[inline]
|
||||
pub fn access(&self) -> usize {
|
||||
self.access.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn use_default_connector(&self) -> bool {
|
||||
self.use_default_connector
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn increase_access(&self) {
|
||||
self.access.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Pool {
|
||||
channels: DashMap<String, Channel>,
|
||||
}
|
||||
|
||||
impl Pool {
|
||||
fn get(&self, addr: &str) -> Option<InnerChannel> {
|
||||
let channel = self.channels.get(addr);
|
||||
channel.map(|ch| {
|
||||
ch.increase_access();
|
||||
ch.channel.clone()
|
||||
})
|
||||
}
|
||||
|
||||
fn entry(&self, addr: String) -> Entry<String, Channel> {
|
||||
self.channels.entry(addr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn get_access(&self, addr: &str) -> Option<usize> {
|
||||
let channel = self.channels.get(addr);
|
||||
channel.map(|ch| ch.access())
|
||||
}
|
||||
|
||||
fn put(&self, addr: &str, channel: Channel) {
|
||||
self.channels.insert(addr.to_string(), channel);
|
||||
}
|
||||
|
||||
fn retain_channel<F>(&self, f: F)
|
||||
where
|
||||
F: FnMut(&String, &mut Channel) -> bool,
|
||||
{
|
||||
self.channels.retain(f);
|
||||
}
|
||||
}
|
||||
|
||||
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tower::service_fn;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_invalid_addr() {
|
||||
let pool = Arc::new(Pool::default());
|
||||
let mgr = ChannelManager {
|
||||
pool,
|
||||
..Default::default()
|
||||
};
|
||||
let addr = "http://test";
|
||||
|
||||
let _ = mgr.get(addr).unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_access_count() {
|
||||
let pool = Arc::new(Pool::default());
|
||||
let config = ChannelConfig::new();
|
||||
let mgr = Arc::new(ChannelManager { pool, config });
|
||||
let addr = "test_uri";
|
||||
|
||||
let mut joins = Vec::with_capacity(10);
|
||||
for _ in 0..10 {
|
||||
let mgr_clone = mgr.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
for _ in 0..100 {
|
||||
let _ = mgr_clone.get(addr);
|
||||
}
|
||||
});
|
||||
joins.push(join);
|
||||
}
|
||||
for join in joins {
|
||||
join.await.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
|
||||
|
||||
mgr.pool
|
||||
.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);
|
||||
|
||||
assert_eq!(0, mgr.pool.get_access(addr).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config() {
|
||||
let default_cfg = ChannelConfig::new();
|
||||
assert_eq!(
|
||||
ChannelConfig {
|
||||
timeout: None,
|
||||
connect_timeout: None,
|
||||
concurrency_limit: None,
|
||||
rate_limit: None,
|
||||
initial_stream_window_size: None,
|
||||
initial_connection_window_size: None,
|
||||
http2_keep_alive_interval: None,
|
||||
http2_keep_alive_timeout: None,
|
||||
http2_keep_alive_while_idle: None,
|
||||
http2_adaptive_window: None,
|
||||
tcp_keepalive: None,
|
||||
tcp_nodelay: true,
|
||||
},
|
||||
default_cfg
|
||||
);
|
||||
|
||||
let cfg = default_cfg
|
||||
.timeout(Duration::from_secs(3))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.concurrency_limit(6)
|
||||
.rate_limit(5, Duration::from_secs(1))
|
||||
.initial_stream_window_size(10)
|
||||
.initial_connection_window_size(20)
|
||||
.http2_keep_alive_interval(Duration::from_secs(1))
|
||||
.http2_keep_alive_timeout(Duration::from_secs(3))
|
||||
.http2_keep_alive_while_idle(true)
|
||||
.http2_adaptive_window(true)
|
||||
.tcp_keepalive(Duration::from_secs(2))
|
||||
.tcp_nodelay(false);
|
||||
|
||||
assert_eq!(
|
||||
ChannelConfig {
|
||||
timeout: Some(Duration::from_secs(3)),
|
||||
connect_timeout: Some(Duration::from_secs(5)),
|
||||
concurrency_limit: Some(6),
|
||||
rate_limit: Some((5, Duration::from_secs(1))),
|
||||
initial_stream_window_size: Some(10),
|
||||
initial_connection_window_size: Some(20),
|
||||
http2_keep_alive_interval: Some(Duration::from_secs(1)),
|
||||
http2_keep_alive_timeout: Some(Duration::from_secs(3)),
|
||||
http2_keep_alive_while_idle: Some(true),
|
||||
http2_adaptive_window: Some(true),
|
||||
tcp_keepalive: Some(Duration::from_secs(2)),
|
||||
tcp_nodelay: false,
|
||||
},
|
||||
cfg
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_endpoint() {
|
||||
let pool = Arc::new(Pool::default());
|
||||
let config = ChannelConfig::new()
|
||||
.timeout(Duration::from_secs(3))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.concurrency_limit(6)
|
||||
.rate_limit(5, Duration::from_secs(1))
|
||||
.initial_stream_window_size(10)
|
||||
.initial_connection_window_size(20)
|
||||
.http2_keep_alive_interval(Duration::from_secs(1))
|
||||
.http2_keep_alive_timeout(Duration::from_secs(3))
|
||||
.http2_keep_alive_while_idle(true)
|
||||
.http2_adaptive_window(true)
|
||||
.tcp_keepalive(Duration::from_secs(2))
|
||||
.tcp_nodelay(true);
|
||||
let mgr = ChannelManager { pool, config };
|
||||
|
||||
let res = mgr.build_endpoint("test_addr");
|
||||
|
||||
assert!(res.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_channel_with_connector() {
|
||||
let pool = Pool {
|
||||
channels: DashMap::default(),
|
||||
};
|
||||
|
||||
let pool = Arc::new(pool);
|
||||
|
||||
let config = ChannelConfig::new();
|
||||
let mgr = ChannelManager { pool, config };
|
||||
|
||||
let addr = "test_addr";
|
||||
let res = mgr.get(addr);
|
||||
assert!(res.is_ok());
|
||||
|
||||
mgr.retain_channel(|addr, channel| {
|
||||
assert_eq!("test_addr", addr);
|
||||
assert!(channel.use_default_connector());
|
||||
true
|
||||
});
|
||||
|
||||
let (client, _) = tokio::io::duplex(1024);
|
||||
let mut client = Some(client);
|
||||
let res = mgr.reset_with_connector(
|
||||
addr,
|
||||
service_fn(move |_| {
|
||||
let client = client.take().unwrap();
|
||||
async move { Ok::<_, std::io::Error>(client) }
|
||||
}),
|
||||
);
|
||||
|
||||
assert!(res.is_ok());
|
||||
|
||||
mgr.retain_channel(|addr, channel| {
|
||||
assert_eq!("test_addr", addr);
|
||||
assert!(!channel.use_default_connector());
|
||||
true
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -49,6 +49,12 @@ pub enum Error {
|
||||
actual: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create gRPC channel, source: {}", source))]
|
||||
CreateChannel {
|
||||
source: tonic::transport::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -61,9 +67,9 @@ impl ErrorExt for Error {
|
||||
Error::UnsupportedDfPlan { .. } | Error::UnsupportedDfExpr { .. } => {
|
||||
StatusCode::Unsupported
|
||||
}
|
||||
Error::NewProjection { .. } | Error::DecodePhysicalPlanNode { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
Error::NewProjection { .. }
|
||||
| Error::DecodePhysicalPlanNode { .. }
|
||||
| Error::CreateChannel { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,3 +81,129 @@ impl ErrorExt for Error {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
type StdResult<E> = std::result::Result<(), E>;
|
||||
|
||||
fn throw_none_option() -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_physical_plan_error() {
|
||||
let e = throw_none_option()
|
||||
.context(EmptyPhysicalPlanSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_physical_expr_error() {
|
||||
let e = throw_none_option()
|
||||
.context(EmptyPhysicalExprSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_df_plan_error() {
|
||||
let e = throw_none_option()
|
||||
.context(UnsupportedDfPlanSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Unsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_df_expr_error() {
|
||||
let e = throw_none_option()
|
||||
.context(UnsupportedDfExprSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Unsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_field_error() {
|
||||
let e = throw_none_option()
|
||||
.context(MissingFieldSnafu { field: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_projection_error() {
|
||||
fn throw_df_error() -> StdResult<DataFusionError> {
|
||||
Err(DataFusionError::NotImplemented("".to_string()))
|
||||
}
|
||||
|
||||
let e = throw_df_error().context(NewProjectionSnafu).err().unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_physical_plan_node_error() {
|
||||
fn throw_decode_error() -> StdResult<DecodeError> {
|
||||
Err(DecodeError::new("test"))
|
||||
}
|
||||
|
||||
let e = throw_decode_error()
|
||||
.context(DecodePhysicalPlanNodeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_mismatch_error() {
|
||||
let e = throw_none_option()
|
||||
.context(TypeMismatchSnafu {
|
||||
column_name: "",
|
||||
expected: "",
|
||||
actual: "",
|
||||
})
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_channel_error() {
|
||||
fn throw_tonic_error() -> StdResult<tonic::transport::Error> {
|
||||
tonic::transport::Endpoint::new("http//http").map(|_| ())
|
||||
}
|
||||
|
||||
let e = throw_tonic_error()
|
||||
.context(CreateChannelSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod channel_manager;
|
||||
pub mod error;
|
||||
pub mod physical;
|
||||
pub mod writer;
|
||||
|
||||
@@ -184,7 +184,7 @@ impl ExecutionPlan for MockExecution {
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
) -> datafusion::error::Result<SendableRecordBatchStream> {
|
||||
let id_array = Arc::new(PrimitiveArray::from_slice([1u32, 2, 3, 4, 5]));
|
||||
let name_array = Arc::new(Utf8Array::<i64>::from_slice([
|
||||
let name_array = Arc::new(Utf8Array::<i32>::from_slice([
|
||||
"zhangsan", "lisi", "wangwu", "Tony", "Mike",
|
||||
]));
|
||||
let age_array = Arc::new(PrimitiveArray::from_slice([25u32, 28, 27, 35, 25]));
|
||||
|
||||
@@ -35,7 +35,7 @@ impl LinesWriter {
|
||||
SemanticType::Timestamp,
|
||||
);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::Timestamp.into()),
|
||||
column.datatype == ColumnDataType::Timestamp as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "timestamp",
|
||||
@@ -52,7 +52,7 @@ impl LinesWriter {
|
||||
pub fn write_tag(&mut self, column_name: &str, value: &str) -> Result<()> {
|
||||
let (idx, column) = self.mut_column(column_name, ColumnDataType::String, SemanticType::Tag);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::String.into()),
|
||||
column.datatype == ColumnDataType::String as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "string",
|
||||
@@ -70,7 +70,7 @@ impl LinesWriter {
|
||||
let (idx, column) =
|
||||
self.mut_column(column_name, ColumnDataType::Uint64, SemanticType::Field);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::Uint64.into()),
|
||||
column.datatype == ColumnDataType::Uint64 as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "u64",
|
||||
@@ -88,7 +88,7 @@ impl LinesWriter {
|
||||
let (idx, column) =
|
||||
self.mut_column(column_name, ColumnDataType::Int64, SemanticType::Field);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::Int64.into()),
|
||||
column.datatype == ColumnDataType::Int64 as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "i64",
|
||||
@@ -106,7 +106,7 @@ impl LinesWriter {
|
||||
let (idx, column) =
|
||||
self.mut_column(column_name, ColumnDataType::Float64, SemanticType::Field);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::Float64.into()),
|
||||
column.datatype == ColumnDataType::Float64 as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "f64",
|
||||
@@ -124,7 +124,7 @@ impl LinesWriter {
|
||||
let (idx, column) =
|
||||
self.mut_column(column_name, ColumnDataType::String, SemanticType::Field);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::String.into()),
|
||||
column.datatype == ColumnDataType::String as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "string",
|
||||
@@ -142,7 +142,7 @@ impl LinesWriter {
|
||||
let (idx, column) =
|
||||
self.mut_column(column_name, ColumnDataType::Boolean, SemanticType::Field);
|
||||
ensure!(
|
||||
column.datatype == Some(ColumnDataType::Boolean.into()),
|
||||
column.datatype == ColumnDataType::Boolean as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "boolean",
|
||||
@@ -197,7 +197,7 @@ impl LinesWriter {
|
||||
column_name: column_name.to_string(),
|
||||
semantic_type: semantic_type.into(),
|
||||
values: Some(Values::with_capacity(datatype, to_insert)),
|
||||
datatype: Some(datatype.into()),
|
||||
datatype: datatype as i32,
|
||||
null_mask: Vec::default(),
|
||||
});
|
||||
column_names.insert(column_name.to_string(), new_idx);
|
||||
@@ -275,7 +275,7 @@ mod tests {
|
||||
|
||||
let column = &columns[0];
|
||||
assert_eq!("host", columns[0].column_name);
|
||||
assert_eq!(Some(ColumnDataType::String as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::String as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Tag as i32, column.semantic_type);
|
||||
assert_eq!(
|
||||
vec!["host1", "host2", "host3"],
|
||||
@@ -285,28 +285,28 @@ mod tests {
|
||||
|
||||
let column = &columns[1];
|
||||
assert_eq!("cpu", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Float64 as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec![0.5, 0.4], column.values.as_ref().unwrap().f64_values);
|
||||
verify_null_mask(&column.null_mask, vec![false, true, false]);
|
||||
|
||||
let column = &columns[2];
|
||||
assert_eq!("memory", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Float64 as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec![0.4], column.values.as_ref().unwrap().f64_values);
|
||||
verify_null_mask(&column.null_mask, vec![false, true, true]);
|
||||
|
||||
let column = &columns[3];
|
||||
assert_eq!("name", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::String as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::String as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec!["name1"], column.values.as_ref().unwrap().string_values);
|
||||
verify_null_mask(&column.null_mask, vec![false, true, true]);
|
||||
|
||||
let column = &columns[4];
|
||||
assert_eq!("ts", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Timestamp as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Timestamp as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
|
||||
assert_eq!(
|
||||
vec![101011000, 102011001, 103011002],
|
||||
@@ -316,28 +316,28 @@ mod tests {
|
||||
|
||||
let column = &columns[5];
|
||||
assert_eq!("enable_reboot", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Boolean as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Boolean as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec![true], column.values.as_ref().unwrap().bool_values);
|
||||
verify_null_mask(&column.null_mask, vec![true, false, true]);
|
||||
|
||||
let column = &columns[6];
|
||||
assert_eq!("year_of_service", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Uint64 as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec![2], column.values.as_ref().unwrap().u64_values);
|
||||
verify_null_mask(&column.null_mask, vec![true, false, true]);
|
||||
|
||||
let column = &columns[7];
|
||||
assert_eq!("temperature", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Int64 as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Int64 as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec![4], column.values.as_ref().unwrap().i64_values);
|
||||
verify_null_mask(&column.null_mask, vec![true, false, true]);
|
||||
|
||||
let column = &columns[8];
|
||||
assert_eq!("cpu_core_num", column.column_name);
|
||||
assert_eq!(Some(ColumnDataType::Uint64 as i32), column.datatype);
|
||||
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Field as i32, column.semantic_type);
|
||||
assert_eq!(vec![16], column.values.as_ref().unwrap().u64_values);
|
||||
verify_null_mask(&column.null_mask, vec![true, true, false]);
|
||||
|
||||
@@ -4,6 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
async-trait = "0.1"
|
||||
common-error = { path = "../error" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-time = { path = "../time" }
|
||||
|
||||
@@ -4,6 +4,7 @@ use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use common_error::prelude::*;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use statrs::StatsError;
|
||||
|
||||
common_error::define_opaque_error!(Error);
|
||||
@@ -17,6 +18,13 @@ pub enum InnerError {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported input datatypes {:?} in function {}", datatypes, function))]
|
||||
UnsupportedInputDataType {
|
||||
function: String,
|
||||
datatypes: Vec<ConcreteDataType>,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to generate function, source: {}", source))]
|
||||
GenerateFunction {
|
||||
source: StatsError,
|
||||
@@ -62,6 +70,36 @@ pub enum InnerError {
|
||||
|
||||
#[snafu(display("unexpected: not constant column"))]
|
||||
InvalidInputCol { backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Not expected to run ExecutionPlan more than once"))]
|
||||
ExecuteRepeatedly { backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("General DataFusion error, source: {}", source))]
|
||||
GeneralDataFusion {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute DataFusion ExecutionPlan, source: {}", source))]
|
||||
DataFusionExecutionPlan {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to convert DataFusion's recordbatch stream, source: {}",
|
||||
source
|
||||
))]
|
||||
ConvertDfRecordBatchStream {
|
||||
#[snafu(backtrace)]
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert arrow schema, source: {}", source))]
|
||||
ConvertArrowSchema {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -76,9 +114,19 @@ impl ErrorExt for InnerError {
|
||||
| InnerError::InvalidInputState { .. }
|
||||
| InnerError::InvalidInputCol { .. }
|
||||
| InnerError::BadAccumulatorImpl { .. } => StatusCode::EngineExecuteQuery,
|
||||
InnerError::InvalidInputs { source, .. } => source.status_code(),
|
||||
InnerError::IntoVector { source, .. } => source.status_code(),
|
||||
InnerError::FromScalarValue { source } => source.status_code(),
|
||||
|
||||
InnerError::InvalidInputs { source, .. }
|
||||
| InnerError::IntoVector { source, .. }
|
||||
| InnerError::FromScalarValue { source }
|
||||
| InnerError::ConvertArrowSchema { source } => source.status_code(),
|
||||
|
||||
InnerError::ExecuteRepeatedly { .. }
|
||||
| InnerError::GeneralDataFusion { .. }
|
||||
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
|
||||
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,6 +153,7 @@ impl From<Error> for DataFusionError {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::error::ArrowError;
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
@@ -127,6 +176,48 @@ mod tests {
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_error(&err, StatusCode::EngineExecuteQuery);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
.context(GeneralDataFusionSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
.context(DataFusionExecutionPlanSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_execute_repeatedly_error() {
|
||||
let error: Error = None::<i32>
|
||||
.context(ExecuteRepeatedlySnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_eq!(error.inner.status_code(), StatusCode::Unexpected);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_recordbatch_stream_error() {
|
||||
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
||||
Err(common_recordbatch::error::InnerError::PollStream {
|
||||
source: ArrowError::Overflow,
|
||||
backtrace: Backtrace::generate(),
|
||||
}
|
||||
.into());
|
||||
let error: Error = result
|
||||
.context(ConvertDfRecordBatchStreamSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_eq!(error.inner.status_code(), StatusCode::Internal);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
fn raise_datatype_error() -> std::result::Result<(), DataTypeError> {
|
||||
|
||||
@@ -4,6 +4,7 @@ pub mod columnar_value;
|
||||
pub mod error;
|
||||
mod function;
|
||||
pub mod logical_plan;
|
||||
pub mod physical_plan;
|
||||
pub mod prelude;
|
||||
mod signature;
|
||||
|
||||
@@ -13,3 +14,5 @@ pub enum Output {
|
||||
RecordBatches(RecordBatches),
|
||||
Stream(SendableRecordBatchStream),
|
||||
}
|
||||
|
||||
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;
|
||||
|
||||
@@ -194,7 +194,6 @@ fn try_into_scalar_value(value: Value, datatype: &ConcreteDataType) -> Result<Sc
|
||||
Value::Null => try_convert_null_value(datatype)?,
|
||||
Value::List(list) => try_convert_list_value(list)?,
|
||||
Value::Timestamp(t) => timestamp_to_scalar_value(t.unit(), Some(t.value())),
|
||||
Value::Geometry(_) => todo!(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ use datafusion::logical_plan::Expr as DfExpr;
|
||||
|
||||
/// Central struct of query API.
|
||||
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
|
||||
#[derive(Clone, PartialEq, Hash)]
|
||||
#[derive(Clone, PartialEq, Hash, Debug)]
|
||||
pub struct Expr {
|
||||
df_expr: DfExpr,
|
||||
}
|
||||
|
||||
325
src/common/query/src/physical_plan.rs
Normal file
325
src/common/query/src/physical_plan.rs
Normal file
@@ -0,0 +1,325 @@
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
|
||||
use common_recordbatch::DfSendableRecordBatchStream;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::error::Result as DfResult;
|
||||
pub use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::physical_plan::expressions::PhysicalSortExpr;
|
||||
pub use datafusion::physical_plan::Partitioning;
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::DfPhysicalPlan;
|
||||
|
||||
pub type PhysicalPlanRef = Arc<dyn PhysicalPlan>;
|
||||
|
||||
/// `PhysicalPlan` represent nodes in the Physical Plan.
|
||||
///
|
||||
/// Each `PhysicalPlan` is Partition-aware and is responsible for
|
||||
/// creating the actual `async` [`SendableRecordBatchStream`]s
|
||||
/// of [`RecordBatch`] that incrementally compute the operator's
|
||||
/// output from its input partition.
|
||||
#[async_trait]
|
||||
pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
/// Returns the physical plan as [`Any`](std::any::Any) so that it can be
|
||||
/// downcast to a specific implementation.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Get the schema for this physical plan
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
/// Specifies the output partitioning scheme of this plan
|
||||
fn output_partitioning(&self) -> Partitioning;
|
||||
|
||||
/// Get a list of child physical plans that provide the input for this plan. The returned list
|
||||
/// will be empty for leaf nodes, will contain a single value for unary nodes, or two
|
||||
/// values for binary nodes (such as joins).
|
||||
fn children(&self) -> Vec<PhysicalPlanRef>;
|
||||
|
||||
/// Returns a new plan where all children were replaced by new plans.
|
||||
/// The size of `children` must be equal to the size of `PhysicalPlan::children()`.
|
||||
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef>;
|
||||
|
||||
/// Creates an RecordBatch stream.
|
||||
async fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
) -> Result<SendableRecordBatchStream>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PhysicalPlanAdapter {
|
||||
schema: SchemaRef,
|
||||
df_plan: Arc<dyn DfPhysicalPlan>,
|
||||
}
|
||||
|
||||
impl PhysicalPlanAdapter {
|
||||
pub fn new(schema: SchemaRef, df_plan: Arc<dyn DfPhysicalPlan>) -> Self {
|
||||
Self { schema, df_plan }
|
||||
}
|
||||
|
||||
pub fn df_plan(&self) -> Arc<dyn DfPhysicalPlan> {
|
||||
self.df_plan.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalPlan for PhysicalPlanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> Partitioning {
|
||||
self.df_plan.output_partitioning()
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<PhysicalPlanRef> {
|
||||
self.df_plan
|
||||
.children()
|
||||
.into_iter()
|
||||
.map(|x| Arc::new(PhysicalPlanAdapter::new(self.schema(), x)) as _)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef> {
|
||||
let children = children
|
||||
.into_iter()
|
||||
.map(|x| Arc::new(DfPhysicalPlanAdapter(x)) as _)
|
||||
.collect();
|
||||
let plan = self
|
||||
.df_plan
|
||||
.with_new_children(children)
|
||||
.context(error::GeneralDataFusionSnafu)?;
|
||||
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let stream = self
|
||||
.df_plan
|
||||
.execute(partition, runtime)
|
||||
.await
|
||||
.context(error::DataFusionExecutionPlanSnafu)?;
|
||||
let stream = RecordBatchStreamAdapter::try_new(stream)
|
||||
.context(error::ConvertDfRecordBatchStreamSnafu)?;
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DfPhysicalPlanAdapter(pub PhysicalPlanRef);
|
||||
|
||||
#[async_trait]
|
||||
impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> DfSchemaRef {
|
||||
self.0.schema().arrow_schema().clone()
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> Partitioning {
|
||||
self.0.output_partitioning()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<Arc<dyn DfPhysicalPlan>> {
|
||||
self.0
|
||||
.children()
|
||||
.into_iter()
|
||||
.map(|x| Arc::new(DfPhysicalPlanAdapter(x)) as _)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
&self,
|
||||
children: Vec<Arc<dyn DfPhysicalPlan>>,
|
||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||
let df_schema = self.schema();
|
||||
let schema: SchemaRef = Arc::new(
|
||||
df_schema
|
||||
.try_into()
|
||||
.context(error::ConvertArrowSchemaSnafu)
|
||||
.map_err(error::Error::from)?,
|
||||
);
|
||||
let children = children
|
||||
.into_iter()
|
||||
.map(|x| Arc::new(PhysicalPlanAdapter::new(schema.clone(), x)) as _)
|
||||
.collect();
|
||||
let plan = self.0.with_new_children(children)?;
|
||||
Ok(Arc::new(DfPhysicalPlanAdapter(plan)))
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
) -> DfResult<DfSendableRecordBatchStream> {
|
||||
let stream = self.0.execute(partition, runtime).await?;
|
||||
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
// TODO(LFC): impl statistics
|
||||
Statistics::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::arrow_print;
|
||||
use datafusion::datasource::TableProvider as DfTableProvider;
|
||||
use datafusion::logical_plan::LogicalPlanBuilder;
|
||||
use datafusion::physical_plan::collect;
|
||||
use datafusion::physical_plan::empty::EmptyExec;
|
||||
use datafusion::prelude::ExecutionContext;
|
||||
use datafusion_common::field_util::SchemaExt;
|
||||
use datafusion_expr::Expr;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct MyDfTableProvider;
|
||||
|
||||
#[async_trait]
|
||||
impl DfTableProvider for MyDfTableProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> DfSchemaRef {
|
||||
Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
"a",
|
||||
DataType::Int32,
|
||||
false,
|
||||
)]))
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_projection: &Option<Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||
let schema = Schema::try_from(self.schema()).unwrap();
|
||||
let my_plan = Arc::new(MyExecutionPlan {
|
||||
schema: Arc::new(schema),
|
||||
});
|
||||
let df_plan = DfPhysicalPlanAdapter(my_plan);
|
||||
Ok(Arc::new(df_plan))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MyExecutionPlan {
|
||||
schema: SchemaRef,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalPlan for MyExecutionPlan {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> Partitioning {
|
||||
Partitioning::UnknownPartitioning(1)
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<PhysicalPlanRef> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn with_new_children(&self, _children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema();
|
||||
let recordbatches = RecordBatches::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
RecordBatch::new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(Int32Vector::from_slice(vec![1])) as _],
|
||||
)
|
||||
.unwrap(),
|
||||
RecordBatch::new(
|
||||
schema,
|
||||
vec![Arc::new(Int32Vector::from_slice(vec![2, 3])) as _],
|
||||
)
|
||||
.unwrap(),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
Ok(recordbatches.as_stream())
|
||||
}
|
||||
}
|
||||
|
||||
// Test our physical plan can be executed by DataFusion, through adapters.
|
||||
#[tokio::test]
|
||||
async fn test_execute_physical_plan() {
|
||||
let ctx = ExecutionContext::new();
|
||||
let logical_plan = LogicalPlanBuilder::scan("test", Arc::new(MyDfTableProvider), None)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
|
||||
let df_recordbatches = collect(physical_plan, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let pretty_print = arrow_print::write(&df_recordbatches);
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
assert_eq!(
|
||||
pretty_print,
|
||||
vec!["+---+", "| a |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_physical_plan_adapter() {
|
||||
let df_schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
"name",
|
||||
DataType::Utf8,
|
||||
true,
|
||||
)]));
|
||||
|
||||
let plan = PhysicalPlanAdapter::new(
|
||||
Arc::new(Schema::try_from(df_schema.clone()).unwrap()),
|
||||
Arc::new(EmptyExec::new(true, df_schema.clone())),
|
||||
);
|
||||
assert!(plan.df_plan.as_any().downcast_ref::<EmptyExec>().is_some());
|
||||
|
||||
let df_plan = DfPhysicalPlanAdapter(Arc::new(plan));
|
||||
assert_eq!(df_schema, df_plan.schema());
|
||||
}
|
||||
}
|
||||
92
src/common/recordbatch/src/adapter.rs
Normal file
92
src/common/recordbatch/src/adapter.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use datatypes::arrow::error::Result as ArrowResult;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::DfSendableRecordBatchStream;
|
||||
use crate::{RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream};
|
||||
|
||||
/// Greptime SendableRecordBatchStream -> DataFusion RecordBatchStream
|
||||
pub struct DfRecordBatchStreamAdapter {
|
||||
stream: SendableRecordBatchStream,
|
||||
}
|
||||
|
||||
impl DfRecordBatchStreamAdapter {
|
||||
pub fn new(stream: SendableRecordBatchStream) -> Self {
|
||||
Self { stream }
|
||||
}
|
||||
}
|
||||
|
||||
impl DfRecordBatchStream for DfRecordBatchStreamAdapter {
|
||||
fn schema(&self) -> DfSchemaRef {
|
||||
self.stream.schema().arrow_schema().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for DfRecordBatchStreamAdapter {
|
||||
type Item = ArrowResult<DfRecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(recordbatch)) => match recordbatch {
|
||||
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.df_recordbatch))),
|
||||
Err(e) => Poll::Ready(Some(Err(ArrowError::External("".to_owned(), Box::new(e))))),
|
||||
},
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.stream.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
/// DataFusion SendableRecordBatchStream -> Greptime RecordBatchStream
|
||||
pub struct RecordBatchStreamAdapter {
|
||||
schema: SchemaRef,
|
||||
stream: DfSendableRecordBatchStream,
|
||||
}
|
||||
|
||||
impl RecordBatchStreamAdapter {
|
||||
pub fn try_new(stream: DfSendableRecordBatchStream) -> Result<Self> {
|
||||
let schema =
|
||||
Arc::new(Schema::try_from(stream.schema()).context(error::SchemaConversionSnafu)?);
|
||||
Ok(Self { schema, stream })
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for RecordBatchStreamAdapter {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for RecordBatchStreamAdapter {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(df_recordbatch)) => Poll::Ready(Some(Ok(RecordBatch {
|
||||
schema: self.schema(),
|
||||
df_recordbatch: df_recordbatch.context(error::PollStreamSnafu)?,
|
||||
}))),
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.stream.size_hint()
|
||||
}
|
||||
}
|
||||
@@ -33,16 +33,32 @@ pub enum InnerError {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert Arrow schema, source: {}", source))]
|
||||
SchemaConversion {
|
||||
source: datatypes::error::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to poll stream, source: {}", source))]
|
||||
PollStream {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for InnerError {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
|
||||
InnerError::DataTypes { .. } | InnerError::CreateRecordBatches { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
InnerError::DataTypes { .. }
|
||||
| InnerError::CreateRecordBatches { .. }
|
||||
| InnerError::PollStream { .. } => StatusCode::Internal,
|
||||
|
||||
InnerError::External { source } => source.status_code(),
|
||||
|
||||
InnerError::SchemaConversion { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
pub mod adapter;
|
||||
pub mod error;
|
||||
mod recordbatch;
|
||||
pub mod util;
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datafusion::arrow_print;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use error::Result;
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::Stream;
|
||||
@@ -52,6 +57,35 @@ pub struct RecordBatches {
|
||||
}
|
||||
|
||||
impl RecordBatches {
|
||||
pub fn try_from_columns<I: IntoIterator<Item = VectorRef>>(
|
||||
schema: SchemaRef,
|
||||
columns: I,
|
||||
) -> Result<Self> {
|
||||
let batches = vec![RecordBatch::new(schema.clone(), columns)?];
|
||||
Ok(Self { schema, batches })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
schema: Arc::new(Schema::new(vec![])),
|
||||
batches: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &RecordBatch> {
|
||||
self.batches.iter()
|
||||
}
|
||||
|
||||
pub fn pretty_print(&self) -> String {
|
||||
arrow_print::write(
|
||||
&self
|
||||
.iter()
|
||||
.map(|x| x.df_recordbatch.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
|
||||
for batch in batches.iter() {
|
||||
ensure!(
|
||||
@@ -74,6 +108,41 @@ impl RecordBatches {
|
||||
pub fn take(self) -> Vec<RecordBatch> {
|
||||
self.batches
|
||||
}
|
||||
|
||||
pub fn as_stream(&self) -> SendableRecordBatchStream {
|
||||
Box::pin(SimpleRecordBatchStream {
|
||||
inner: RecordBatches {
|
||||
schema: self.schema(),
|
||||
batches: self.batches.clone(),
|
||||
},
|
||||
index: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SimpleRecordBatchStream {
|
||||
inner: RecordBatches,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl RecordBatchStream for SimpleRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.inner.schema()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for SimpleRecordBatchStream {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Poll::Ready(if self.index < self.inner.batches.len() {
|
||||
let batch = self.inner.batches[self.index].clone();
|
||||
self.index += 1;
|
||||
Some(Ok(batch))
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -87,7 +156,26 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_recordbatches() {
|
||||
fn test_recordbatches_try_from_columns() {
|
||||
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
|
||||
"a",
|
||||
ConcreteDataType::int32_datatype(),
|
||||
false,
|
||||
)]));
|
||||
let result = RecordBatches::try_from_columns(
|
||||
schema.clone(),
|
||||
vec![Arc::new(StringVector::from(vec!["hello", "world"])) as _],
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
let v: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
|
||||
let expected = vec![RecordBatch::new(schema.clone(), vec![v.clone()]).unwrap()];
|
||||
let r = RecordBatches::try_from_columns(schema, vec![v]).unwrap();
|
||||
assert_eq!(r.take(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_recordbatches_try_new() {
|
||||
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
|
||||
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
|
||||
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
|
||||
@@ -113,7 +201,39 @@ mod tests {
|
||||
);
|
||||
|
||||
let batches = RecordBatches::try_new(schema1.clone(), vec![batch1.clone()]).unwrap();
|
||||
let expected = "\
|
||||
+---+-------+
|
||||
| a | b |
|
||||
+---+-------+
|
||||
| 1 | hello |
|
||||
| 2 | world |
|
||||
+---+-------+";
|
||||
assert_eq!(batches.pretty_print(), expected);
|
||||
|
||||
assert_eq!(schema1, batches.schema());
|
||||
assert_eq!(vec![batch1], batches.take());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_simple_recordbatch_stream() {
|
||||
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
|
||||
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
|
||||
let schema = Arc::new(Schema::new(vec![column_a, column_b]));
|
||||
|
||||
let va1: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
|
||||
let vb1: VectorRef = Arc::new(StringVector::from(vec!["a", "b"]));
|
||||
let batch1 = RecordBatch::new(schema.clone(), vec![va1, vb1]).unwrap();
|
||||
|
||||
let va2: VectorRef = Arc::new(Int32Vector::from_slice(&[3, 4, 5]));
|
||||
let vb2: VectorRef = Arc::new(StringVector::from(vec!["c", "d", "e"]));
|
||||
let batch2 = RecordBatch::new(schema.clone(), vec![va2, vb2]).unwrap();
|
||||
|
||||
let recordbatches =
|
||||
RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
|
||||
let stream = recordbatches.as_stream();
|
||||
let collected = util::collect(stream).await.unwrap();
|
||||
assert_eq!(collected.len(), 2);
|
||||
assert_eq!(collected[0], batch1);
|
||||
assert_eq!(collected[1], batch2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,6 +31,10 @@ impl RecordBatch {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn num_rows(&self) -> usize {
|
||||
self.df_recordbatch.num_rows()
|
||||
}
|
||||
|
||||
/// Create an iterator to traverse the data by row
|
||||
pub fn rows(&self) -> RecordBatchRowIterator<'_> {
|
||||
RecordBatchRowIterator::new(self)
|
||||
|
||||
27
src/common/substrait/Cargo.toml
Normal file
27
src/common/substrait/Cargo.toml
Normal file
@@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "substrait"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
bytes = "1.1"
|
||||
catalog = { path = "../../catalog" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
futures = "0.3"
|
||||
prost = "0.9"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dependencies.substrait_proto]
|
||||
package = "substrait"
|
||||
version = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
datatypes = { path = "../../datatypes" }
|
||||
table = { path = "../../table" }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user