mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-24 15:09:59 +00:00
Compare commits
109 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7d2ede4a4 | ||
|
|
df6ebbf934 | ||
|
|
719fbf2f3a | ||
|
|
cc14dea913 | ||
|
|
cc7c313937 | ||
|
|
a6e41cdd7b | ||
|
|
a5771e2ec3 | ||
|
|
68e64a6ce9 | ||
|
|
90cd3bb5c9 | ||
|
|
bea37e30d8 | ||
|
|
d988b43996 | ||
|
|
0fc816fb0c | ||
|
|
43391e0162 | ||
|
|
3e7f7e3e8d | ||
|
|
0819582a26 | ||
|
|
9fa871a3fa | ||
|
|
76640402ba | ||
|
|
c20dbda598 | ||
|
|
33dbf7264f | ||
|
|
716bde8f04 | ||
|
|
9f2825495d | ||
|
|
ae21c1c1e9 | ||
|
|
6b6617f9cb | ||
|
|
d5f0ba4ad9 | ||
|
|
e021da2eee | ||
|
|
fac9c17a9b | ||
|
|
dfc2a45de1 | ||
|
|
3e8ec8b73a | ||
|
|
a90798a2c1 | ||
|
|
f5cf5685cc | ||
|
|
1a21a6ea41 | ||
|
|
09f003d01d | ||
|
|
29c6155ae3 | ||
|
|
804348966d | ||
|
|
b7bdee6de9 | ||
|
|
c850e9695a | ||
|
|
a3e47955b8 | ||
|
|
554a69ea54 | ||
|
|
f8b6a6b219 | ||
|
|
dce0adfc7e | ||
|
|
da66138e80 | ||
|
|
d10de46e03 | ||
|
|
59f7630000 | ||
|
|
a6932c6a08 | ||
|
|
10593a5adb | ||
|
|
bf8c717022 | ||
|
|
aa9f6c344c | ||
|
|
99353c6ce7 | ||
|
|
a2d8804129 | ||
|
|
637a4a2a58 | ||
|
|
ef134479ef | ||
|
|
451f9d2d4e | ||
|
|
68d3247791 | ||
|
|
2458b4edd5 | ||
|
|
5848f27c27 | ||
|
|
215cea151f | ||
|
|
a82f1f564d | ||
|
|
48c2841e4d | ||
|
|
d2542552d3 | ||
|
|
c0132e6cc0 | ||
|
|
aea932b891 | ||
|
|
0253136333 | ||
|
|
6a05f617a4 | ||
|
|
a2b262ebc0 | ||
|
|
972f64c3d7 | ||
|
|
eb77f9aafd | ||
|
|
dee20144d7 | ||
|
|
563adbabe9 | ||
|
|
b71bb4e5fa | ||
|
|
fae293310c | ||
|
|
3e51640442 | ||
|
|
b40193d7da | ||
|
|
b5e5f8e555 | ||
|
|
192fa0caa5 | ||
|
|
30eb676d6a | ||
|
|
d7cadf6e6d | ||
|
|
d7a1435517 | ||
|
|
0943079de2 | ||
|
|
509d07b798 | ||
|
|
e72ce5eaa9 | ||
|
|
f491a040f5 | ||
|
|
47179a7812 | ||
|
|
995a28a27d | ||
|
|
ed1cb73ffc | ||
|
|
0ffa628c22 | ||
|
|
5edd2a3dbe | ||
|
|
e63b28bff1 | ||
|
|
8140d4e3e5 | ||
|
|
6825459c75 | ||
|
|
7eb4d81929 | ||
|
|
8ba0741c81 | ||
|
|
0eeb5b460c | ||
|
|
65ea6fd85f | ||
|
|
4f15b26b28 | ||
|
|
15ee4ac729 | ||
|
|
b4fc8c5b78 | ||
|
|
6f81717866 | ||
|
|
77f9383daf | ||
|
|
c788b7fc26 | ||
|
|
0f160a73be | ||
|
|
92963b9614 | ||
|
|
f1139fba59 | ||
|
|
4e552245b1 | ||
|
|
3126bbc1c7 | ||
|
|
b77b561bc8 | ||
|
|
501faad8ab | ||
|
|
5397a9bbe6 | ||
|
|
f351ee7042 | ||
|
|
e0493e0b8f |
@@ -3,3 +3,13 @@ linker = "aarch64-linux-gnu-gcc"
|
||||
|
||||
[alias]
|
||||
sqlness = "run --bin sqlness-runner --"
|
||||
|
||||
|
||||
[build]
|
||||
rustflags = [
|
||||
# lints
|
||||
# TODO: use lint configuration in cargo https://github.com/rust-lang/cargo/issues/5034
|
||||
"-Wclippy::print_stdout",
|
||||
"-Wclippy::print_stderr",
|
||||
"-Wclippy::implicit_clone",
|
||||
]
|
||||
|
||||
2
.github/workflows/develop.yml
vendored
2
.github/workflows/develop.yml
vendored
@@ -183,7 +183,7 @@ jobs:
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Run cargo clippy
|
||||
run: cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
|
||||
run: cargo clippy --workspace --all-targets -- -D warnings
|
||||
|
||||
coverage:
|
||||
if: github.event.pull_request.draft == false
|
||||
|
||||
79
.github/workflows/release.yml
vendored
79
.github/workflows/release.yml
vendored
@@ -5,6 +5,7 @@ on:
|
||||
schedule:
|
||||
# At 00:00 on Monday.
|
||||
- cron: '0 0 * * 1'
|
||||
# Mannually trigger only builds binaries.
|
||||
workflow_dispatch:
|
||||
|
||||
name: Release
|
||||
@@ -32,38 +33,42 @@ jobs:
|
||||
os: ubuntu-2004-16-cores
|
||||
file: greptime-linux-amd64
|
||||
continue-on-error: false
|
||||
opts: "-F servers/dashboard"
|
||||
- arch: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-2004-16-cores
|
||||
file: greptime-linux-arm64
|
||||
continue-on-error: false
|
||||
opts: "-F servers/dashboard"
|
||||
- arch: aarch64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-darwin-arm64
|
||||
continue-on-error: false
|
||||
opts: "-F servers/dashboard"
|
||||
- arch: x86_64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-darwin-amd64
|
||||
continue-on-error: false
|
||||
opts: "-F servers/dashboard"
|
||||
- arch: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-2004-16-cores
|
||||
file: greptime-linux-amd64-pyo3
|
||||
continue-on-error: false
|
||||
opts: "-F pyo3_backend"
|
||||
opts: "-F pyo3_backend,servers/dashboard"
|
||||
- arch: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-2004-16-cores
|
||||
file: greptime-linux-arm64-pyo3
|
||||
continue-on-error: false
|
||||
opts: "-F pyo3_backend"
|
||||
opts: "-F pyo3_backend,servers/dashboard"
|
||||
- arch: aarch64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-darwin-arm64-pyo3
|
||||
continue-on-error: false
|
||||
opts: "-F pyo3_backend"
|
||||
opts: "-F pyo3_backend,servers/dashboard"
|
||||
- arch: x86_64-apple-darwin
|
||||
os: macos-latest
|
||||
file: greptime-darwin-amd64-pyo3
|
||||
continue-on-error: false
|
||||
opts: "-F pyo3_backend"
|
||||
opts: "-F pyo3_backend,servers/dashboard"
|
||||
runs-on: ${{ matrix.os }}
|
||||
continue-on-error: ${{ matrix.continue-on-error }}
|
||||
if: github.repository == 'GreptimeTeam/greptimedb'
|
||||
@@ -164,7 +169,7 @@ jobs:
|
||||
export LD_LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LD_LIBRARY_PATH
|
||||
export LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LIBRARY_PATH
|
||||
export PATH=$PYTHON_INSTALL_PATH_AMD64/bin:$PATH
|
||||
|
||||
|
||||
echo "implementation=CPython" >> pyo3.config
|
||||
echo "version=3.10" >> pyo3.config
|
||||
echo "implementation=CPython" >> pyo3.config
|
||||
@@ -212,7 +217,7 @@ jobs:
|
||||
name: Build docker image
|
||||
needs: [build]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'GreptimeTeam/greptimedb'
|
||||
if: github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v3
|
||||
@@ -252,9 +257,8 @@ jobs:
|
||||
|
||||
- name: Unzip the amd64 artifacts
|
||||
run: |
|
||||
cd amd64
|
||||
tar xvf greptime-linux-amd64-pyo3.tgz
|
||||
rm greptime-linux-amd64-pyo3.tgz
|
||||
tar xvf amd64/greptime-linux-amd64-pyo3.tgz -C amd64/ && rm amd64/greptime-linux-amd64-pyo3.tgz
|
||||
cp -r amd64 docker/ci
|
||||
|
||||
- name: Download arm64 binary
|
||||
id: download-arm64
|
||||
@@ -267,15 +271,14 @@ jobs:
|
||||
id: unzip-arm64
|
||||
if: success() || steps.download-arm64.conclusion == 'success'
|
||||
run: |
|
||||
cd arm64
|
||||
tar xvf greptime-linux-arm64-pyo3.tgz
|
||||
rm greptime-linux-arm64-pyo3.tgz
|
||||
tar xvf arm64/greptime-linux-arm64-pyo3.tgz -C arm64/ && rm arm64/greptime-linux-arm64-pyo3.tgz
|
||||
cp -r arm64 docker/ci
|
||||
|
||||
- name: Build and push all
|
||||
uses: docker/build-push-action@v3
|
||||
if: success() || steps.unzip-arm64.conclusion == 'success' # Build and push all platform if unzip-arm64 succeeds
|
||||
with:
|
||||
context: .
|
||||
context: ./docker/ci/
|
||||
file: ./docker/ci/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
@@ -287,7 +290,7 @@ jobs:
|
||||
uses: docker/build-push-action@v3
|
||||
if: success() || steps.download-arm64.conclusion == 'failure' # Only build and push amd64 platform if download-arm64 fails
|
||||
with:
|
||||
context: .
|
||||
context: ./docker/ci/
|
||||
file: ./docker/ci/Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64
|
||||
@@ -300,7 +303,7 @@ jobs:
|
||||
# Release artifacts only when all the artifacts are built successfully.
|
||||
needs: [build,docker]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'GreptimeTeam/greptimedb'
|
||||
if: github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'workflow_dispatch'
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v3
|
||||
@@ -338,49 +341,3 @@ jobs:
|
||||
name: "Release ${{ github.ref_name }}"
|
||||
files: |
|
||||
**/greptime-*
|
||||
|
||||
docker-push-uhub:
|
||||
name: Push docker image to UCloud Container Registry
|
||||
needs: [docker]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'GreptimeTeam/greptimedb'
|
||||
# Push to uhub may fail(500 error), but we don't want to block the release process. The failed job will be retried manually.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Login to UCloud Container Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: uhub.service.ucloud.cn
|
||||
username: ${{ secrets.UCLOUD_USERNAME }}
|
||||
password: ${{ secrets.UCLOUD_PASSWORD }}
|
||||
|
||||
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
|
||||
shell: bash
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
VERSION=${{ github.ref_name }}
|
||||
echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Push image to uhub # Use 'docker buildx imagetools create' to create a new image base on source image.
|
||||
run: |
|
||||
docker buildx imagetools create \
|
||||
--tag uhub.service.ucloud.cn/greptime/greptimedb:latest \
|
||||
--tag uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }} \
|
||||
greptime/greptimedb:${{ env.IMAGE_TAG }}
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -35,3 +35,7 @@ benchmarks/data
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# dashboard files
|
||||
!/src/servers/dashboard/VERSION
|
||||
/src/servers/dashboard/*
|
||||
|
||||
@@ -51,7 +51,7 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
|
||||
- To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
|
||||
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
|
||||
- Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
|
||||
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
|
||||
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).
|
||||
|
||||
#### `pre-commit` Hooks
|
||||
|
||||
|
||||
1542
Cargo.lock
generated
1542
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
31
Cargo.toml
31
Cargo.toml
@@ -24,6 +24,7 @@ members = [
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/file-table-engine",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
@@ -46,38 +47,38 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow = { version = "34.0" }
|
||||
arrow-array = "34.0"
|
||||
arrow-flight = "34.0"
|
||||
arrow-schema = { version = "34.0", features = ["serde"] }
|
||||
arrow = { version = "37.0" }
|
||||
arrow-array = "37.0"
|
||||
arrow-flight = "37.0"
|
||||
arrow-schema = { version = "37.0", features = ["serde"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
|
||||
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "146a949218ec970784974137277cde3b4e547d0a" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
|
||||
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "74a778ca6016a853a3c3add3fa8c6f12f4fe4561" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
parquet = "34.0"
|
||||
parquet = "37.0"
|
||||
paste = "1.0"
|
||||
prost = "0.11"
|
||||
rand = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sqlparser = "0.32"
|
||||
sqlparser = "0.33"
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.24.2", features = ["full"] }
|
||||
tokio-util = "0.7"
|
||||
tonic = { version = "0.8", features = ["tls"] }
|
||||
tokio-util = { version = "0.7", features = ["io-util"] }
|
||||
tonic = { version = "0.9", features = ["tls"] }
|
||||
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
|
||||
|
||||
[profile.release]
|
||||
|
||||
6
Makefile
6
Makefile
@@ -21,6 +21,10 @@ fmt: ## Format all the Rust code.
|
||||
|
||||
.PHONY: fmt-toml
|
||||
fmt-toml: ## Format all TOML files.
|
||||
taplo format --option "indent_string= "
|
||||
|
||||
.PHONY: check-toml
|
||||
check-toml: ## Check all TOML files.
|
||||
taplo format --check --option "indent_string= "
|
||||
|
||||
.PHONY: docker-image
|
||||
@@ -47,7 +51,7 @@ check: ## Cargo check all the targets.
|
||||
|
||||
.PHONY: clippy
|
||||
clippy: ## Check clippy rules.
|
||||
cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
|
||||
cargo clippy --workspace --all-targets -- -D warnings
|
||||
|
||||
.PHONY: fmt-check
|
||||
fmt-check: ## Check code format.
|
||||
|
||||
17
README.md
17
README.md
@@ -1,14 +1,14 @@
|
||||
<p align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset="/docs/logo-text-padding.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="/docs/logo-text-padding-dark.png">
|
||||
<img alt="GreptimeDB Logo" src="/docs/logo-text-padding.png" width="400px">
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
|
||||
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
|
||||
<h3 align="center">
|
||||
The next-generation hybrid timeseries/analytics processing database in the cloud
|
||||
The next-generation hybrid time-series/analytics processing database in the cloud
|
||||
</h3>
|
||||
|
||||
<p align="center">
|
||||
@@ -23,6 +23,8 @@
|
||||
<a href="https://twitter.com/greptime"><img src="https://img.shields.io/badge/twitter-follow_us-1d9bf0.svg"></a>
|
||||
|
||||
<a href="https://www.linkedin.com/company/greptime/"><img src="https://img.shields.io/badge/linkedin-connect_with_us-0a66c2.svg"></a>
|
||||
|
||||
<a href="https://greptime.com/slack"><img src="https://img.shields.io/badge/slack-GreptimeDB-0abd59?logo=slack" alt="slack" /></a>
|
||||
</p>
|
||||
|
||||
## What is GreptimeDB
|
||||
@@ -36,11 +38,11 @@ Our core developers have been building time-series data platform
|
||||
for years. Based on their best-practices, GreptimeDB is born to give you:
|
||||
|
||||
- A standalone binary that scales to highly-available distributed cluster, providing a transparent experience for cluster users
|
||||
- Optimized columnar layout for handling time-series data; compacted, compressed, stored on various storage backends
|
||||
- Flexible index options, tackling high cardinality issues down
|
||||
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends
|
||||
- Flexible indexes, tackling high cardinality issues down
|
||||
- Distributed, parallel query execution, leveraging elastic computing resource
|
||||
- Native SQL, and Python scripting for advanced analytical scenarios
|
||||
- Widely adopted database protocols and APIs
|
||||
- Widely adopted database protocols and APIs, native PromQL supports
|
||||
- Extensible table engine architecture for extensive workloads
|
||||
|
||||
## Quick Start
|
||||
@@ -158,6 +160,7 @@ You can always cleanup test database by removing `/tmp/greptimedb`.
|
||||
- GreptimeDB [User Guide](https://docs.greptime.com/user-guide/concepts.html)
|
||||
- GreptimeDB [Developer
|
||||
Guide](https://docs.greptime.com/developer-guide/overview.html)
|
||||
- GreptimeDB [internal code document](https://greptimedb.rs)
|
||||
|
||||
### Dashboard
|
||||
- [The dashboard UI for GreptimeDB](https://github.com/GreptimeTeam/dashboard)
|
||||
|
||||
19
SECURITY.md
Normal file
19
SECURITY.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
| Version | Supported |
|
||||
| ------- | ------------------ |
|
||||
| >= v0.1.0 | :white_check_mark: |
|
||||
| < v0.1.0 | :x: |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
We place great importance on the security of GreptimeDB code, software,
|
||||
and cloud platform. If you come across a security vulnerability in GreptimeDB,
|
||||
we kindly request that you inform us immediately. We will thoroughly investigate
|
||||
all valid reports and make every effort to resolve the issue promptly.
|
||||
|
||||
To report any issues or vulnerabilities, please email us at info@greptime.com, rather than
|
||||
posting publicly on GitHub. Be sure to provide us with the version identifier as well as details
|
||||
on how the vulnerability can be exploited.
|
||||
@@ -126,12 +126,13 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
|
||||
|
||||
for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
|
||||
let (values, datatype) = build_values(array);
|
||||
|
||||
let column = Column {
|
||||
column_name: field.name().to_owned(),
|
||||
column_name: field.name().clone(),
|
||||
values: Some(values),
|
||||
null_mask: array
|
||||
.data()
|
||||
.null_bitmap()
|
||||
.to_data()
|
||||
.nulls()
|
||||
.map(|bitmap| bitmap.buffer().as_slice().to_vec())
|
||||
.unwrap_or_default(),
|
||||
datatype: datatype.into(),
|
||||
@@ -182,10 +183,10 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
|
||||
let values = array.values();
|
||||
(
|
||||
Values {
|
||||
i64_values: values.to_vec(),
|
||||
ts_microsecond_values: values.to_vec(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Int64,
|
||||
ColumnDataType::TimestampMicrosecond,
|
||||
)
|
||||
}
|
||||
DataType::Utf8 => {
|
||||
@@ -224,7 +225,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _, _)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal128(_, _)
|
||||
| DataType::Decimal256(_, _)
|
||||
@@ -252,13 +253,13 @@ fn create_table_expr() -> CreateTableExpr {
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tpep_pickup_datetime".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
datatype: ColumnDataType::TimestampMicrosecond as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tpep_dropoff_datetime".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
datatype: ColumnDataType::TimestampMicrosecond as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: vec![],
|
||||
},
|
||||
@@ -365,6 +366,7 @@ fn create_table_expr() -> CreateTableExpr {
|
||||
table_options: Default::default(),
|
||||
region_ids: vec![0],
|
||||
table_id: None,
|
||||
engine: "mito".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,11 +37,21 @@ type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
|
||||
# Compaction options, see `standalone.example.toml`.
|
||||
[compaction]
|
||||
[storage.compaction]
|
||||
max_inflight_tasks = 4
|
||||
max_files_in_level0 = 8
|
||||
max_purge_tasks = 32
|
||||
|
||||
# Storage manifest options
|
||||
[storage.manifest]
|
||||
# Region checkpoint actions margin.
|
||||
# Create a checkpoint every <checkpoint_margin> actions.
|
||||
checkpoint_margin = 10
|
||||
# Region manifest logs and checkpoints gc execution duration
|
||||
gc_duration = '30s'
|
||||
# Whether to try creating a manifest checkpoint on region opening
|
||||
checkpoint_on_startup = false
|
||||
|
||||
# Procedure storage options, see `standalone.example.toml`.
|
||||
# [procedure.store]
|
||||
# type = "File"
|
||||
|
||||
@@ -99,7 +99,7 @@ type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
|
||||
# Compaction options.
|
||||
[compaction]
|
||||
[storage.compaction]
|
||||
# Max task number that can concurrently run.
|
||||
max_inflight_tasks = 4
|
||||
# Max files in level 0 to trigger compaction.
|
||||
@@ -107,6 +107,16 @@ max_files_in_level0 = 8
|
||||
# Max task number for SST purge task after compaction.
|
||||
max_purge_tasks = 32
|
||||
|
||||
# Storage manifest options
|
||||
[storage.manifest]
|
||||
# Region checkpoint actions margin.
|
||||
# Create a checkpoint every <checkpoint_margin> actions.
|
||||
checkpoint_margin = 10
|
||||
# Region manifest logs and checkpoints gc execution duration
|
||||
gc_duration = '30s'
|
||||
# Whether to try creating a manifest checkpoint on region opening
|
||||
checkpoint_on_startup = false
|
||||
|
||||
# Procedure storage options.
|
||||
# Uncomment to enable.
|
||||
# [procedure.store]
|
||||
|
||||
@@ -6,7 +6,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
python3.10-dev \
|
||||
python3-pip
|
||||
|
||||
RUN python3 -m pip install pyarrow
|
||||
COPY requirements.txt /etc/greptime/requirements.txt
|
||||
|
||||
RUN python3 -m pip install -r /etc/greptime/requirements.txt
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
|
||||
5
docker/ci/requirements.txt
Normal file
5
docker/ci/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
numpy>=1.24.2
|
||||
pandas>=1.5.3
|
||||
pyarrow>=11.0.0
|
||||
requests>=2.28.2
|
||||
scipy>=1.10.1
|
||||
74
docs/how-to/how-to-implement-sql-statement.md
Normal file
74
docs/how-to/how-to-implement-sql-statement.md
Normal file
@@ -0,0 +1,74 @@
|
||||
This document introduces how to implement SQL statements in GreptimeDB.
|
||||
|
||||
The execution entry point for SQL statements locates at Frontend Instance. You can see it has
|
||||
implemented `SqlQueryHandler`:
|
||||
|
||||
```rust
|
||||
impl SqlQueryHandler for Instance {
|
||||
type Error = Error;
|
||||
|
||||
async fn do_query(&self, query: &str, query_ctx: QueryContextRef) -> Vec<Result<Output>> {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Normally, when a SQL query arrives at GreptimeDB, the `do_query` method will be called. After some parsing work, the SQL
|
||||
will be feed into `StatementExecutor`:
|
||||
|
||||
```rust
|
||||
// in Frontend Instance:
|
||||
self.statement_executor.execute_sql(stmt, query_ctx).await
|
||||
```
|
||||
|
||||
That's where we handle our SQL statements. You can just create a new match arm for your statement there, then the
|
||||
statement is implemented for both GreptimeDB Standalone and Cluster. You can see how `DESCRIBE TABLE` is implemented as
|
||||
an example.
|
||||
|
||||
Now, what if the statements should be handled differently for GreptimeDB Standalone and Cluster? You can see there's
|
||||
a `SqlStatementExecutor` field in `StatementExecutor`. Each GreptimeDB Standalone and Cluster has its own implementation
|
||||
of `SqlStatementExecutor`. If you are going to implement the statements differently in the two mode (
|
||||
like `CREATE TABLE`), you have to implement them in their own `SqlStatementExecutor`s.
|
||||
|
||||
Summarize as the diagram below:
|
||||
|
||||
```text
|
||||
SQL query
|
||||
|
|
||||
v
|
||||
+---------------------------+
|
||||
| SqlQueryHandler::do_query |
|
||||
+---------------------------+
|
||||
|
|
||||
| SQL parsing
|
||||
v
|
||||
+--------------------------------+
|
||||
| StatementExecutor::execute_sql |
|
||||
+--------------------------------+
|
||||
|
|
||||
| SQL execution
|
||||
v
|
||||
+----------------------------------+
|
||||
| commonly handled statements like |
|
||||
| "plan_exec" for selection or |
|
||||
+----------------------------------+
|
||||
| |
|
||||
For Standalone | | For Cluster
|
||||
v v
|
||||
+---------------------------+ +---------------------------+
|
||||
| SqlStatementExecutor impl | | SqlStatementExecutor impl |
|
||||
| in Datanode Instance | | in Frontend DistInstance |
|
||||
+---------------------------+ +---------------------------+
|
||||
```
|
||||
|
||||
Note that some SQL statements can be executed in our QueryEngine, in the form of `LogicalPlan`. You can follow the
|
||||
invocation path down to the `QueryEngine` implementation from `StatementExecutor::plan_exec`. For now, there's only
|
||||
one `DatafusionQueryEngine` for both GreptimeDB Standalone and Cluster. That lone query engine works for both modes is
|
||||
because GreptimeDB read/write data through `Table` trait, and each mode has its own `Table` implementation.
|
||||
|
||||
We don't have any bias towards whether statements should be handled in query engine or `StatementExecutor`. You can
|
||||
implement one kind of statement in both places. For example, `Insert` with selection is handled in query engine, because
|
||||
we can easily do the query part there. However, `Insert` without selection is not, for the cost of parsing statement
|
||||
to `LogicalPlan` is not neglectable. So generally if the SQL query is simple enough, you can handle it
|
||||
in `StatementExecutor`; otherwise if it is complex or has some part of selection, it should be parsed to `LogicalPlan`
|
||||
and handled in query engine.
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 47 KiB |
174
docs/rfcs/2023-03-29-file-external-table/rfc.md
Normal file
174
docs/rfcs/2023-03-29-file-external-table/rfc.md
Normal file
@@ -0,0 +1,174 @@
|
||||
---
|
||||
Feature Name: "File external table"
|
||||
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/1041
|
||||
Date: 2023-03-08
|
||||
Author: "Xu Wenkang <wenymedia@gmail.com>"
|
||||
---
|
||||
|
||||
File external table
|
||||
|
||||
---
|
||||
|
||||
# Summary
|
||||
|
||||
Allows users to perform SQL queries on files
|
||||
|
||||
# Motivation
|
||||
|
||||
User data may already exist in other storages, i.g., file systems/s3, etc. in CSV, parquet, JSON format, etc. We can provide users the ability to perform SQL queries on these files.
|
||||
|
||||
|
||||
# Details
|
||||
|
||||
## Overview
|
||||
|
||||
The file external table providers users ability to perform SQL queries on these files.
|
||||
|
||||
For example, a user has a CSV file on the local file system `/var/data/city.csv`:
|
||||
|
||||
```
|
||||
Rank , Name , State , 2023 Population , 2020 Census , Annual Change , Density (mi²)
|
||||
1 , New York City , New York , 8,992,908 , 8,804,190 , 0.7% , 29,938
|
||||
2 , Los Angeles , California , 3,930,586 , 3,898,747 , 0.27% , 8,382
|
||||
3 , Chicago , Illinois , 2,761,625 , 2,746,388 , 0.18% , 12,146
|
||||
.....
|
||||
```
|
||||
|
||||
Then user can create a file external table with:
|
||||
|
||||
```sql
|
||||
CREATE EXTERNAL TABLE city with(location='/var/data/city.csv', format="CSV", field_delimiter = ',', record_delimiter = '\n', skip_header = 1);
|
||||
```
|
||||
|
||||
Then query the external table with:
|
||||
|
||||
```bash
|
||||
MySQL> select * from city;
|
||||
```
|
||||
|
||||
| Rank | Name | State | 2023 Population | 2020 Census | Annual Change | Density (mi²) |
|
||||
| :--- | :------------ | :--------- | :-------------- | :---------- | :------------ | :------------ |
|
||||
| 1 | New York City | New York | 8,992,908 | 8,804,190 | 0.7% | 29,938 |
|
||||
| 2 | Los Angeles | California | 3,930,586 | 3,898,747 | 0.27% | 8,382 |
|
||||
| 3 | Chicago | Illinois | 2,761,625 | 2,746,388 | 0.18% | 12,146 |
|
||||
|
||||
Drop the external table, if needs with:
|
||||
|
||||
```sql
|
||||
DROP EXTERNAL TABLE city
|
||||
```
|
||||
|
||||
|
||||
### Syntax
|
||||
|
||||
```
|
||||
CREATE EXTERNAL [<database>.]<table_name>
|
||||
[
|
||||
(
|
||||
<col_name> <col_type> [NULL | NOT NULL] [COMMENT "<comment>"]
|
||||
)
|
||||
]
|
||||
[ WITH
|
||||
(
|
||||
LOCATION = 'url'
|
||||
[,FIELD_DELIMITER = 'delimiter' ]
|
||||
[,RECORD_DELIMITER = 'delimiter' ]
|
||||
[,SKIP_HEADER = '<number>' ]
|
||||
[,FORMAT = { csv | json | parquet } ]
|
||||
[,PATTERN = '<regex_pattern>' ]
|
||||
[,ENDPOINT = '<uri>' ]
|
||||
[,ACCESS_KEY_ID = '<key_id>' ]
|
||||
[,SECRET_ACCESS_KEY = '<access_key>' ]
|
||||
[,SESSION_TOKEN = '<token>' ]
|
||||
[,REGION = '<region>' ]
|
||||
[,ENABLE_VIRTUAL_HOST_STYLE = '<boolean>']
|
||||
..
|
||||
)
|
||||
]
|
||||
```
|
||||
|
||||
### Supported File Format
|
||||
|
||||
The external file table supports multiple formats; We divide formats into row format and columnar format.
|
||||
|
||||
Row formats:
|
||||
|
||||
- CSV, JSON
|
||||
|
||||
Columnar formats:
|
||||
|
||||
- Parquet
|
||||
|
||||
Some of these formats support filter pushdown, and others don't. If users use very large files, that format doesn't support pushdown, which might consume a lot of IO for scanning full files and cause a long running query.
|
||||
|
||||
### File Table Engine
|
||||
|
||||

|
||||
|
||||
We implement a file table engine that creates an external table by accepting user-specified file paths and treating all records as immutable.
|
||||
|
||||
1. File Format Decoder: decode files to the `RecordBatch` stream.
|
||||
2. File Table Engine: implement the `TableProvider` trait, store necessary metadata in memory, and provide scan ability.
|
||||
|
||||
Our implementation is better for small files. For large files(i.g., a GB-level CSV file), suggests our users import data to the database.
|
||||
|
||||
## Drawbacks
|
||||
|
||||
- Some formats don't support filter pushdown
|
||||
- Hard to support indexing
|
||||
|
||||
## Life cycle
|
||||
|
||||
### Register a table
|
||||
|
||||
1. Write metadata to manifest.
|
||||
2. Create the table via file table engine.
|
||||
3. Register table to `CatalogProvider` and register table to `SystemCatalog`(persist tables to disk).
|
||||
|
||||
### Deregister a table (Drop a table)
|
||||
|
||||
1. Fetch the target table info (figure out table engine type).
|
||||
2. Deregister the target table in `CatalogProvider` and `SystemCatalog`.
|
||||
3. Find the target table engine.
|
||||
4. Drop the target table.
|
||||
|
||||
### Recover a table when restarting
|
||||
|
||||
1. Collect tables name and engine type info.
|
||||
2. Find the target tables in different engines.
|
||||
3. Open and register tables.
|
||||
|
||||
# Alternatives
|
||||
|
||||
## Using DataFusion API
|
||||
|
||||
We can use datafusion API to register a file table:
|
||||
|
||||
```rust
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
|
||||
|
||||
// create a plan
|
||||
let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?;
|
||||
```
|
||||
|
||||
### Drawbacks
|
||||
|
||||
The DataFusion implements its own `Object Store` abstraction and supports parsing the partitioned directories, which can push down the filter and skips some directories. However, this makes it impossible to use our's `LruCacheLayer`(The parsing of the partitioned directories required paths as input). If we want to manage memory entirely, we should implement our own `TableProvider` or `Table`.
|
||||
|
||||
- Impossible to use `CacheLayer`
|
||||
|
||||
## Introduce an intermediate representation layer
|
||||
|
||||

|
||||
|
||||
We convert all files into `parquet` as an intermediate representation. Then we only need to implement a `parquet` file table engine, and we already have a similar one. Also, it supports limited filter pushdown via the `parquet` row group stats.
|
||||
|
||||
### Drawbacks
|
||||
|
||||
- Computing overhead
|
||||
- Storage overhead
|
||||
|
||||
|
||||
|
||||
39
scripts/fetch-dashboard-assets.sh
Executable file
39
scripts/fetch-dashboard-assets.sh
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This script is used to download built dashboard assets from the "GreptimeTeam/dashboard" repository.
|
||||
|
||||
set -e
|
||||
|
||||
declare -r SCRIPT_DIR=$(cd $(dirname ${0}) >/dev/null 2>&1 && pwd)
|
||||
declare -r ROOT_DIR=$(dirname ${SCRIPT_DIR})
|
||||
declare -r STATIC_DIR="$ROOT_DIR/src/servers/dashboard"
|
||||
|
||||
RELEASE_VERSION="$(cat $STATIC_DIR/VERSION)"
|
||||
|
||||
# Download the SHA256 checksum attached to the release. To verify the integrity
|
||||
# of the download, this checksum will be used to check the download tar file
|
||||
# containing the built dashboard assets.
|
||||
curl -Ls https://github.com/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/sha256.txt --output sha256.txt
|
||||
|
||||
# Download the tar file containing the built dashboard assets.
|
||||
curl -L https://github.com/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz --output build.tar.gz
|
||||
|
||||
# Verify the checksums match; exit if they don't.
|
||||
case "$(uname -s)" in
|
||||
FreeBSD | Darwin)
|
||||
echo "$(cat sha256.txt)" | shasum --algorithm 256 --check \
|
||||
|| { echo "Checksums did not match for downloaded dashboard assets!"; exit 1; } ;;
|
||||
Linux)
|
||||
echo "$(cat sha256.txt)" | sha256sum --check -- \
|
||||
|| { echo "Checksums did not match for downloaded dashboard assets!"; exit 1; } ;;
|
||||
*)
|
||||
echo "The '$(uname -s)' operating system is not supported as a build host for the dashboard" >&2
|
||||
exit 1
|
||||
esac
|
||||
|
||||
# Extract the assets and clean up.
|
||||
tar -xzf build.tar.gz -C "$STATIC_DIR"
|
||||
rm sha256.txt
|
||||
rm build.tar.gz
|
||||
|
||||
echo "Successfully download dashboard assets to $STATIC_DIR"
|
||||
@@ -10,10 +10,10 @@ common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "eb760d219206c77dd3a105ecb6a3ba97d9d650ec" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0bebe5f69c91cdfbce85cb8f45f9fcd28185261c" }
|
||||
prost.workspace = true
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tonic.workspace = true
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = "0.8"
|
||||
tonic-build = "0.9"
|
||||
|
||||
@@ -18,7 +18,7 @@ use common_error::ext::ErrorExt;
|
||||
use common_error::prelude::StatusCode;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use snafu::prelude::*;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
use snafu::Location;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -26,12 +26,12 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unknown proto column datatype: {}", datatype))]
|
||||
UnknownColumnDataType { datatype: i32, backtrace: Backtrace },
|
||||
UnknownColumnDataType { datatype: i32, location: Location },
|
||||
|
||||
#[snafu(display("Failed to create column datatype from {:?}", from))]
|
||||
IntoColumnDataType {
|
||||
from: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
@@ -66,9 +66,6 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
|
||||
@@ -7,6 +7,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
arc-swap = "1.0"
|
||||
arrow-schema.workspace = true
|
||||
async-stream.workspace = true
|
||||
async-trait = "0.1"
|
||||
backoff = { version = "0.4", features = ["tokio"] }
|
||||
|
||||
15
src/catalog/src/datafusion.rs
Normal file
15
src/catalog/src/datafusion.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod catalog_adapter;
|
||||
@@ -18,10 +18,6 @@ use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::error::{self as catalog_error, Error};
|
||||
use catalog::{
|
||||
CatalogListRef, CatalogProvider, CatalogProviderRef, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
use common_error::prelude::BoxedError;
|
||||
use datafusion::catalog::catalog::{
|
||||
CatalogList as DfCatalogList, CatalogProvider as DfCatalogProvider,
|
||||
@@ -33,7 +29,10 @@ use snafu::ResultExt;
|
||||
use table::table::adapter::{DfTableProviderAdapter, TableAdapter};
|
||||
use table::TableRef;
|
||||
|
||||
use crate::datafusion::error;
|
||||
use crate::error::{self, Result, SchemaProviderOperationSnafu};
|
||||
use crate::{
|
||||
CatalogListRef, CatalogProvider, CatalogProviderRef, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
pub struct DfCatalogListAdapter {
|
||||
catalog_list: CatalogListRef,
|
||||
@@ -89,7 +88,7 @@ impl CatalogProvider for CatalogProviderAdapter {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema_names(&self) -> catalog::error::Result<Vec<String>> {
|
||||
fn schema_names(&self) -> Result<Vec<String>> {
|
||||
Ok(self.df_catalog_provider.schema_names())
|
||||
}
|
||||
|
||||
@@ -97,11 +96,11 @@ impl CatalogProvider for CatalogProviderAdapter {
|
||||
&self,
|
||||
_name: String,
|
||||
_schema: SchemaProviderRef,
|
||||
) -> catalog::error::Result<Option<SchemaProviderRef>> {
|
||||
) -> Result<Option<SchemaProviderRef>> {
|
||||
todo!("register_schema is not supported in Datafusion catalog provider")
|
||||
}
|
||||
|
||||
fn schema(&self, name: &str) -> catalog::error::Result<Option<Arc<dyn SchemaProvider>>> {
|
||||
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
|
||||
Ok(self
|
||||
.df_catalog_provider
|
||||
.schema(name)
|
||||
@@ -196,11 +195,11 @@ impl SchemaProvider for SchemaProviderAdapter {
|
||||
}
|
||||
|
||||
/// Retrieves the list of available table names in this schema.
|
||||
fn table_names(&self) -> Result<Vec<String>, Error> {
|
||||
fn table_names(&self) -> Result<Vec<String>> {
|
||||
Ok(self.df_schema_provider.table_names())
|
||||
}
|
||||
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let table = self.df_schema_provider.table(name).await;
|
||||
let table = table.map(|table_provider| {
|
||||
match table_provider
|
||||
@@ -219,11 +218,7 @@ impl SchemaProvider for SchemaProviderAdapter {
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
fn register_table(
|
||||
&self,
|
||||
name: String,
|
||||
table: TableRef,
|
||||
) -> catalog::error::Result<Option<TableRef>> {
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
let table_provider = Arc::new(DfTableProviderAdapter::new(table.clone()));
|
||||
Ok(self
|
||||
.df_schema_provider
|
||||
@@ -232,43 +227,43 @@ impl SchemaProvider for SchemaProviderAdapter {
|
||||
msg: "Fail to register table to datafusion",
|
||||
})
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog_error::SchemaProviderOperationSnafu)?
|
||||
.context(SchemaProviderOperationSnafu)?
|
||||
.map(|_| table))
|
||||
}
|
||||
|
||||
fn rename_table(&self, _name: &str, _new_name: String) -> catalog_error::Result<TableRef> {
|
||||
fn rename_table(&self, _name: &str, _new_name: String) -> Result<TableRef> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> catalog::error::Result<Option<TableRef>> {
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
self.df_schema_provider
|
||||
.deregister_table(name)
|
||||
.context(error::DatafusionSnafu {
|
||||
msg: "Fail to deregister table from datafusion",
|
||||
})
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog_error::SchemaProviderOperationSnafu)?
|
||||
.context(SchemaProviderOperationSnafu)?
|
||||
.map(|table| {
|
||||
let adapter = TableAdapter::new(table)
|
||||
.context(error::TableSchemaMismatchSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog_error::SchemaProviderOperationSnafu)?;
|
||||
.context(SchemaProviderOperationSnafu)?;
|
||||
Ok(Arc::new(adapter) as _)
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> Result<bool, Error> {
|
||||
fn table_exist(&self, name: &str) -> Result<bool> {
|
||||
Ok(self.df_schema_provider.table_exist(name))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use catalog::local::{new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
use super::*;
|
||||
use crate::local::{new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
@@ -19,7 +19,7 @@ use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
use snafu::Location;
|
||||
|
||||
use crate::DeregisterTableRequest;
|
||||
|
||||
@@ -50,7 +50,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("System catalog is not valid: {}", msg))]
|
||||
SystemCatalog { msg: String, backtrace: Backtrace },
|
||||
SystemCatalog { msg: String, location: Location },
|
||||
|
||||
#[snafu(display(
|
||||
"System catalog table type mismatch, expected: binary, found: {:?}",
|
||||
@@ -58,61 +58,68 @@ pub enum Error {
|
||||
))]
|
||||
SystemCatalogTypeMismatch {
|
||||
data_type: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
|
||||
InvalidEntryType {
|
||||
entry_type: Option<u8>,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system catalog key: {:?}", key))]
|
||||
InvalidKey {
|
||||
key: Option<String>,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Catalog value is not present"))]
|
||||
EmptyValue { backtrace: Backtrace },
|
||||
EmptyValue { location: Location },
|
||||
|
||||
#[snafu(display("Failed to deserialize value, source: {}", source))]
|
||||
ValueDeserialize {
|
||||
source: serde_json::error::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Table engine not found: {}, source: {}", engine_name, source))]
|
||||
TableEngineNotFound {
|
||||
engine_name: String,
|
||||
#[snafu(backtrace)]
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot find catalog by name: {}", catalog_name))]
|
||||
CatalogNotFound {
|
||||
catalog_name: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot find schema {} in catalog {}", schema, catalog))]
|
||||
SchemaNotFound {
|
||||
catalog: String,
|
||||
schema: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Table `{}` already exists", table))]
|
||||
TableExists { table: String, backtrace: Backtrace },
|
||||
TableExists { table: String, location: Location },
|
||||
|
||||
#[snafu(display("Table `{}` not exist", table))]
|
||||
TableNotExist { table: String, backtrace: Backtrace },
|
||||
TableNotExist { table: String, location: Location },
|
||||
|
||||
#[snafu(display("Schema {} already exists", schema))]
|
||||
SchemaExists {
|
||||
schema: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
SchemaExists { schema: String, location: Location },
|
||||
|
||||
#[snafu(display("Operation {} not implemented yet", operation))]
|
||||
Unimplemented {
|
||||
operation: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Operation {} not supported", op))]
|
||||
NotSupported { op: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
|
||||
OpenTable {
|
||||
table_info: String,
|
||||
@@ -123,7 +130,7 @@ pub enum Error {
|
||||
#[snafu(display("Table not found while opening table, table info: {}", table_info))]
|
||||
TableNotFound {
|
||||
table_info: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read system catalog table records"))]
|
||||
@@ -132,6 +139,12 @@ pub enum Error {
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create recordbatch, source: {}", source))]
|
||||
CreateRecordBatch {
|
||||
#[snafu(backtrace)]
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to insert table creation record to system catalog, source: {}",
|
||||
source
|
||||
@@ -153,7 +166,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Illegal catalog manager state: {}", msg))]
|
||||
IllegalManagerState { backtrace: Backtrace, msg: String },
|
||||
IllegalManagerState { location: Location, msg: String },
|
||||
|
||||
#[snafu(display("Failed to scan system catalog table, source: {}", source))]
|
||||
SystemCatalogTableScan {
|
||||
@@ -219,6 +232,22 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system table definition: {err_msg}"))]
|
||||
InvalidSystemTableDef { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("{}: {}", msg, source))]
|
||||
Datafusion {
|
||||
msg: String,
|
||||
source: DataFusionError,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Table schema mismatch, source: {}", source))]
|
||||
TableSchemaMismatch {
|
||||
#[snafu(backtrace)]
|
||||
source: table::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -231,7 +260,8 @@ impl ErrorExt for Error {
|
||||
| Error::TableNotFound { .. }
|
||||
| Error::IllegalManagerState { .. }
|
||||
| Error::CatalogNotFound { .. }
|
||||
| Error::InvalidEntryType { .. } => StatusCode::Unexpected,
|
||||
| Error::InvalidEntryType { .. }
|
||||
| Error::InvalidSystemTableDef { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::SystemCatalog { .. }
|
||||
| Error::EmptyValue { .. }
|
||||
@@ -239,14 +269,18 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::SystemCatalogTypeMismatch { .. } => StatusCode::Internal,
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::ReadSystemCatalog { source, .. } | Error::CreateRecordBatch { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::InvalidCatalogValue { source, .. } | Error::CatalogEntrySerde { source } => {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
Error::TableNotExist { .. } => StatusCode::TableNotFound,
|
||||
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
|
||||
Error::SchemaExists { .. } | Error::TableEngineNotFound { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
|
||||
Error::OpenSystemCatalog { source, .. }
|
||||
| Error::CreateSystemCatalog { source, .. }
|
||||
@@ -254,7 +288,8 @@ impl ErrorExt for Error {
|
||||
| Error::OpenTable { source, .. }
|
||||
| Error::CreateTable { source, .. }
|
||||
| Error::DeregisterTable { source, .. }
|
||||
| Error::RegionStats { source, .. } => source.status_code(),
|
||||
| Error::RegionStats { source, .. }
|
||||
| Error::TableSchemaMismatch { source } => source.status_code(),
|
||||
|
||||
Error::MetaSrv { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScan { source } => source.status_code(),
|
||||
@@ -264,15 +299,12 @@ impl ErrorExt for Error {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
Error::Unimplemented { .. } => StatusCode::Unsupported,
|
||||
Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported,
|
||||
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
|
||||
Error::Datafusion { .. } => StatusCode::EngineExecuteQuery,
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
@@ -296,7 +328,7 @@ mod tests {
|
||||
StatusCode::TableAlreadyExists,
|
||||
Error::TableExists {
|
||||
table: "some_table".to_string(),
|
||||
backtrace: Backtrace::generate(),
|
||||
location: Location::generate(),
|
||||
}
|
||||
.status_code()
|
||||
);
|
||||
@@ -310,7 +342,7 @@ mod tests {
|
||||
StatusCode::StorageUnavailable,
|
||||
Error::SystemCatalog {
|
||||
msg: "".to_string(),
|
||||
backtrace: Backtrace::generate(),
|
||||
location: Location::generate(),
|
||||
}
|
||||
.status_code()
|
||||
);
|
||||
@@ -319,7 +351,7 @@ mod tests {
|
||||
StatusCode::Internal,
|
||||
Error::SystemCatalogTypeMismatch {
|
||||
data_type: ConcreteDataType::binary_datatype(),
|
||||
backtrace: Backtrace::generate(),
|
||||
location: Location::generate(),
|
||||
}
|
||||
.status_code()
|
||||
);
|
||||
@@ -333,7 +365,7 @@ mod tests {
|
||||
pub fn test_errors_to_datafusion_error() {
|
||||
let e: DataFusionError = Error::TableExists {
|
||||
table: "test_table".to_string(),
|
||||
backtrace: Backtrace::generate(),
|
||||
location: Location::generate(),
|
||||
}
|
||||
.into();
|
||||
match e {
|
||||
|
||||
80
src/catalog/src/information_schema.rs
Normal file
80
src/catalog/src/information_schema.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod tables;
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use datafusion::datasource::streaming::{PartitionStream, StreamingTable};
|
||||
use snafu::ResultExt;
|
||||
use table::table::adapter::TableAdapter;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{DatafusionSnafu, Result, TableSchemaMismatchSnafu};
|
||||
use crate::information_schema::tables::InformationSchemaTables;
|
||||
use crate::{CatalogProviderRef, SchemaProvider};
|
||||
|
||||
const TABLES: &str = "tables";
|
||||
|
||||
pub(crate) struct InformationSchemaProvider {
|
||||
catalog_name: String,
|
||||
catalog_provider: CatalogProviderRef,
|
||||
}
|
||||
|
||||
impl InformationSchemaProvider {
|
||||
pub(crate) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
|
||||
Self {
|
||||
catalog_name,
|
||||
catalog_provider,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SchemaProvider for InformationSchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Result<Vec<String>> {
|
||||
Ok(vec![TABLES.to_string()])
|
||||
}
|
||||
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let table = if name.eq_ignore_ascii_case(TABLES) {
|
||||
Arc::new(InformationSchemaTables::new(
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_provider.clone(),
|
||||
))
|
||||
} else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let table = Arc::new(
|
||||
StreamingTable::try_new(table.schema().clone(), vec![table]).with_context(|_| {
|
||||
DatafusionSnafu {
|
||||
msg: format!("Failed to get InformationSchema table '{name}'"),
|
||||
}
|
||||
})?,
|
||||
);
|
||||
let table = TableAdapter::new(table).context(TableSchemaMismatchSnafu)?;
|
||||
Ok(Some(Arc::new(table)))
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> Result<bool> {
|
||||
Ok(matches!(name.to_ascii_lowercase().as_str(), TABLES))
|
||||
}
|
||||
}
|
||||
165
src/catalog/src/information_schema/tables.rs
Normal file
165
src/catalog/src/information_schema/tables.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
|
||||
use common_query::physical_plan::TaskContext;
|
||||
use common_recordbatch::RecordBatch;
|
||||
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::StringVectorBuilder;
|
||||
use snafu::ResultExt;
|
||||
use table::metadata::TableType;
|
||||
|
||||
use crate::error::{CreateRecordBatchSnafu, Result};
|
||||
use crate::information_schema::TABLES;
|
||||
use crate::CatalogProviderRef;
|
||||
|
||||
pub(super) struct InformationSchemaTables {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_provider: CatalogProviderRef,
|
||||
}
|
||||
|
||||
impl InformationSchemaTables {
|
||||
pub(super) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
|
||||
]));
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_provider,
|
||||
}
|
||||
}
|
||||
|
||||
fn builder(&self) -> InformationSchemaTablesBuilder {
|
||||
InformationSchemaTablesBuilder::new(
|
||||
self.schema.clone(),
|
||||
self.catalog_name.clone(),
|
||||
self.catalog_provider.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the `information_schema.TABLE` table row by row
|
||||
///
|
||||
/// Columns are based on <https://www.postgresql.org/docs/current/infoschema-columns.html>
|
||||
struct InformationSchemaTablesBuilder {
|
||||
schema: SchemaRef,
|
||||
catalog_name: String,
|
||||
catalog_provider: CatalogProviderRef,
|
||||
|
||||
catalog_names: StringVectorBuilder,
|
||||
schema_names: StringVectorBuilder,
|
||||
table_names: StringVectorBuilder,
|
||||
table_types: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaTablesBuilder {
|
||||
fn new(schema: SchemaRef, catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
catalog_name,
|
||||
catalog_provider,
|
||||
catalog_names: StringVectorBuilder::with_capacity(42),
|
||||
schema_names: StringVectorBuilder::with_capacity(42),
|
||||
table_names: StringVectorBuilder::with_capacity(42),
|
||||
table_types: StringVectorBuilder::with_capacity(42),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct the `information_schema.tables` virtual table
|
||||
async fn make_tables(&mut self) -> Result<RecordBatch> {
|
||||
let catalog_name = self.catalog_name.clone();
|
||||
|
||||
for schema_name in self.catalog_provider.schema_names()? {
|
||||
if schema_name == INFORMATION_SCHEMA_NAME {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(schema) = self.catalog_provider.schema(&schema_name)? else { continue };
|
||||
for table_name in schema.table_names()? {
|
||||
let Some(table) = schema.table(&table_name).await? else { continue };
|
||||
self.add_table(&catalog_name, &schema_name, &table_name, table.table_type());
|
||||
}
|
||||
}
|
||||
|
||||
// Add a final list for the information schema tables themselves
|
||||
self.add_table(
|
||||
&catalog_name,
|
||||
INFORMATION_SCHEMA_NAME,
|
||||
TABLES,
|
||||
TableType::View,
|
||||
);
|
||||
|
||||
self.finish()
|
||||
}
|
||||
|
||||
fn add_table(
|
||||
&mut self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
table_type: TableType,
|
||||
) {
|
||||
self.catalog_names.push(Some(catalog_name));
|
||||
self.schema_names.push(Some(schema_name));
|
||||
self.table_names.push(Some(table_name));
|
||||
self.table_types.push(Some(match table_type {
|
||||
TableType::Base => "BASE TABLE",
|
||||
TableType::View => "VIEW",
|
||||
TableType::Temporary => "LOCAL TEMPORARY",
|
||||
}));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
Arc::new(self.catalog_names.finish()),
|
||||
Arc::new(self.schema_names.finish()),
|
||||
Arc::new(self.table_names.finish()),
|
||||
Arc::new(self.table_types.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl DfPartitionStream for InformationSchemaTables {
|
||||
fn schema(&self) -> &ArrowSchemaRef {
|
||||
self.schema.arrow_schema()
|
||||
}
|
||||
|
||||
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
|
||||
let schema = self.schema().clone();
|
||||
let mut builder = self.builder();
|
||||
Box::pin(DfRecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(async move {
|
||||
builder
|
||||
.make_tables()
|
||||
.await
|
||||
.map(|x| x.into_df_record_batch())
|
||||
.map_err(Into::into)
|
||||
}),
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,7 @@ use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{RegionStat, TableName};
|
||||
use common_telemetry::{info, warn};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::CreateTableRequest;
|
||||
@@ -29,8 +29,10 @@ use table::TableRef;
|
||||
use crate::error::{CreateTableSnafu, Result};
|
||||
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
|
||||
|
||||
pub mod datafusion;
|
||||
pub mod error;
|
||||
pub mod helper;
|
||||
pub(crate) mod information_schema;
|
||||
pub mod local;
|
||||
pub mod remote;
|
||||
pub mod schema;
|
||||
@@ -228,34 +230,25 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
|
||||
|
||||
/// The stat of regions in the datanode node.
|
||||
/// The number of regions can be got from len of vec.
|
||||
pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> Result<(u64, Vec<RegionStat>)> {
|
||||
///
|
||||
/// Ignores any errors occurred during iterating regions. The intention of this method is to
|
||||
/// collect region stats that will be carried in Datanode's heartbeat to Metasrv, so it's a
|
||||
/// "try our best" job.
|
||||
pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> (u64, Vec<RegionStat>) {
|
||||
let mut region_number: u64 = 0;
|
||||
let mut region_stats = Vec::new();
|
||||
|
||||
for catalog_name in catalog_manager.catalog_names()? {
|
||||
let catalog =
|
||||
catalog_manager
|
||||
.catalog(&catalog_name)?
|
||||
.context(error::CatalogNotFoundSnafu {
|
||||
catalog_name: &catalog_name,
|
||||
})?;
|
||||
let Ok(catalog_names) = catalog_manager.catalog_names() else { return (region_number, region_stats) };
|
||||
for catalog_name in catalog_names {
|
||||
let Ok(Some(catalog)) = catalog_manager.catalog(&catalog_name) else { continue };
|
||||
|
||||
for schema_name in catalog.schema_names()? {
|
||||
let schema = catalog
|
||||
.schema(&schema_name)?
|
||||
.context(error::SchemaNotFoundSnafu {
|
||||
catalog: &catalog_name,
|
||||
schema: &schema_name,
|
||||
})?;
|
||||
let Ok(schema_names) = catalog.schema_names() else { continue };
|
||||
for schema_name in schema_names {
|
||||
let Ok(Some(schema)) = catalog.schema(&schema_name) else { continue };
|
||||
|
||||
for table_name in schema.table_names()? {
|
||||
let table =
|
||||
schema
|
||||
.table(&table_name)
|
||||
.await?
|
||||
.context(error::TableNotFoundSnafu {
|
||||
table_info: &table_name,
|
||||
})?;
|
||||
let Ok(table_names) = schema.table_names() else { continue };
|
||||
for table_name in table_names {
|
||||
let Ok(Some(table)) = schema.table(&table_name).await else { continue };
|
||||
|
||||
let region_numbers = &table.table_info().meta.region_numbers;
|
||||
region_number += region_numbers.len() as u64;
|
||||
@@ -282,6 +275,5 @@ pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> Result<(u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((region_number, region_stats))
|
||||
(region_number, region_stats)
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
MITO_ENGINE, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
@@ -27,7 +27,8 @@ use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::engine::manager::TableEngineManagerRef;
|
||||
use table::engine::EngineContext;
|
||||
use table::metadata::TableId;
|
||||
use table::requests::OpenTableRequest;
|
||||
use table::table::numbers::NumbersTable;
|
||||
@@ -37,7 +38,8 @@ use table::TableRef;
|
||||
use crate::error::{
|
||||
self, CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
|
||||
Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
|
||||
SystemCatalogTypeMismatchSnafu, TableExistsSnafu, TableNotFoundSnafu,
|
||||
SystemCatalogTypeMismatchSnafu, TableEngineNotFoundSnafu, TableExistsSnafu, TableNotExistSnafu,
|
||||
TableNotFoundSnafu,
|
||||
};
|
||||
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::system::{
|
||||
@@ -55,7 +57,7 @@ use crate::{
|
||||
pub struct LocalCatalogManager {
|
||||
system: Arc<SystemCatalog>,
|
||||
catalogs: Arc<MemoryCatalogManager>,
|
||||
engine: TableEngineRef,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
next_table_id: AtomicU32,
|
||||
init_lock: Mutex<bool>,
|
||||
register_lock: Mutex<()>,
|
||||
@@ -63,19 +65,20 @@ pub struct LocalCatalogManager {
|
||||
}
|
||||
|
||||
impl LocalCatalogManager {
|
||||
/// Create a new [CatalogManager] with given user catalogs and table engine
|
||||
pub async fn try_new(engine: TableEngineRef) -> Result<Self> {
|
||||
/// Create a new [CatalogManager] with given user catalogs and mito engine
|
||||
pub async fn try_new(engine_manager: TableEngineManagerRef) -> Result<Self> {
|
||||
let engine = engine_manager
|
||||
.engine(MITO_ENGINE)
|
||||
.context(TableEngineNotFoundSnafu {
|
||||
engine_name: MITO_ENGINE,
|
||||
})?;
|
||||
let table = SystemCatalogTable::new(engine.clone()).await?;
|
||||
let memory_catalog_list = crate::local::memory::new_memory_catalog_list()?;
|
||||
let system_catalog = Arc::new(SystemCatalog::new(
|
||||
table,
|
||||
memory_catalog_list.clone(),
|
||||
engine.clone(),
|
||||
));
|
||||
let system_catalog = Arc::new(SystemCatalog::new(table));
|
||||
Ok(Self {
|
||||
system: system_catalog,
|
||||
catalogs: memory_catalog_list,
|
||||
engine,
|
||||
engine_manager,
|
||||
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
|
||||
init_lock: Mutex::new(false),
|
||||
register_lock: Mutex::new(()),
|
||||
@@ -100,7 +103,14 @@ impl LocalCatalogManager {
|
||||
|
||||
// Processing system table hooks
|
||||
let mut sys_table_requests = self.system_table_requests.lock().await;
|
||||
handle_system_table_request(self, self.engine.clone(), &mut sys_table_requests).await?;
|
||||
let engine = self
|
||||
.engine_manager
|
||||
.engine(MITO_ENGINE)
|
||||
.context(TableEngineNotFoundSnafu {
|
||||
engine_name: MITO_ENGINE,
|
||||
})?;
|
||||
|
||||
handle_system_table_request(self, engine, &mut sys_table_requests).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -253,9 +263,14 @@ impl LocalCatalogManager {
|
||||
table_name: t.table_name.clone(),
|
||||
table_id: t.table_id,
|
||||
};
|
||||
let engine = self
|
||||
.engine_manager
|
||||
.engine(&t.engine)
|
||||
.context(TableEngineNotFoundSnafu {
|
||||
engine_name: &t.engine,
|
||||
})?;
|
||||
|
||||
let option = self
|
||||
.engine
|
||||
let option = engine
|
||||
.open_table(&context, request)
|
||||
.await
|
||||
.with_context(|_| OpenTableSnafu {
|
||||
@@ -290,9 +305,7 @@ impl CatalogList for LocalCatalogManager {
|
||||
}
|
||||
|
||||
fn catalog_names(&self) -> Result<Vec<String>> {
|
||||
let mut res = self.catalogs.catalog_names()?;
|
||||
res.push(SYSTEM_CATALOG_NAME.to_string());
|
||||
Ok(res)
|
||||
self.catalogs.catalog_names()
|
||||
}
|
||||
|
||||
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
|
||||
@@ -364,6 +377,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
// Try to register table with same table id, just ignore.
|
||||
Ok(false)
|
||||
} else {
|
||||
let engine = request.table.table_info().meta.engine.to_string();
|
||||
// table does not exist
|
||||
self.system
|
||||
.register_table(
|
||||
@@ -371,6 +385,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
engine,
|
||||
)
|
||||
.await?;
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
@@ -404,6 +419,21 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema: schema_name,
|
||||
})?;
|
||||
|
||||
let _lock = self.register_lock.lock().await;
|
||||
ensure!(
|
||||
!schema.table_exist(&request.new_table_name)?,
|
||||
TableExistsSnafu {
|
||||
table: &request.new_table_name
|
||||
}
|
||||
);
|
||||
let old_table = schema
|
||||
.table(&request.table_name)
|
||||
.await?
|
||||
.context(TableNotExistSnafu {
|
||||
table: &request.table_name,
|
||||
})?;
|
||||
|
||||
let engine = old_table.table_info().meta.engine.to_string();
|
||||
// rename table in system catalog
|
||||
self.system
|
||||
.register_table(
|
||||
@@ -411,11 +441,14 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema_name.clone(),
|
||||
request.new_table_name.clone(),
|
||||
request.table_id,
|
||||
engine,
|
||||
)
|
||||
.await?;
|
||||
Ok(schema
|
||||
.rename_table(&request.table_name, request.new_table_name)
|
||||
.is_ok())
|
||||
|
||||
let renamed = schema
|
||||
.rename_table(&request.table_name, request.new_table_name.clone())
|
||||
.is_ok();
|
||||
Ok(renamed)
|
||||
}
|
||||
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
|
||||
@@ -530,6 +563,8 @@ impl CatalogManager for LocalCatalogManager {
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use mito::engine::MITO_ENGINE;
|
||||
|
||||
use super::*;
|
||||
use crate::system::{CatalogEntry, SchemaEntry};
|
||||
|
||||
@@ -541,6 +576,7 @@ mod tests {
|
||||
schema_name: "S1".to_string(),
|
||||
table_name: "T1".to_string(),
|
||||
table_id: 1,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
}),
|
||||
Entry::Catalog(CatalogEntry {
|
||||
catalog_name: "C2".to_string(),
|
||||
@@ -561,6 +597,7 @@ mod tests {
|
||||
schema_name: "S1".to_string(),
|
||||
table_name: "T2".to_string(),
|
||||
table_id: 2,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
}),
|
||||
];
|
||||
let res = LocalCatalogManager::sort_entries(vec);
|
||||
|
||||
@@ -324,16 +324,20 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
|
||||
fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef> {
|
||||
let mut tables = self.tables.write().unwrap();
|
||||
if tables.get(name).is_some() {
|
||||
let table = tables.remove(name).unwrap();
|
||||
tables.insert(new_name, table.clone());
|
||||
Ok(table)
|
||||
} else {
|
||||
TableNotFoundSnafu {
|
||||
let Some(table) = tables.remove(name) else {
|
||||
return TableNotFoundSnafu {
|
||||
table_info: name.to_string(),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
.fail()?;
|
||||
};
|
||||
let e = match tables.entry(new_name) {
|
||||
Entry::Vacant(e) => e,
|
||||
Entry::Occupied(e) => {
|
||||
return TableExistsSnafu { table: e.key() }.fail();
|
||||
}
|
||||
};
|
||||
e.insert(table.clone());
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
@@ -396,7 +400,6 @@ mod tests {
|
||||
let other_table = NumbersTable::new(12);
|
||||
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
|
||||
let err = result.err().unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
assert_eq!(StatusCode::TableAlreadyExists, err.status_code());
|
||||
}
|
||||
|
||||
@@ -460,7 +463,7 @@ mod tests {
|
||||
assert!(schema.table_exist(table_name).unwrap());
|
||||
|
||||
// rename table
|
||||
let new_table_name = "numbers";
|
||||
let new_table_name = "numbers_new";
|
||||
let rename_table_req = RenameTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
|
||||
@@ -20,23 +20,25 @@ use std::sync::Arc;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID, MITO_ENGINE,
|
||||
};
|
||||
use common_telemetry::{debug, error, info};
|
||||
use dashmap::DashMap;
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use parking_lot::RwLock;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::engine::manager::TableEngineManagerRef;
|
||||
use table::engine::EngineContext;
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{CreateTableRequest, OpenTableRequest};
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, OpenTableSnafu, Result,
|
||||
SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
|
||||
SchemaNotFoundSnafu, TableEngineNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
@@ -55,14 +57,14 @@ pub struct RemoteCatalogManager {
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
catalogs: Arc<RwLock<DashMap<String, CatalogProviderRef>>>,
|
||||
engine: TableEngineRef,
|
||||
engine_manager: TableEngineManagerRef,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogManager {
|
||||
pub fn new(engine: TableEngineRef, node_id: u64, backend: KvBackendRef) -> Self {
|
||||
pub fn new(engine_manager: TableEngineManagerRef, node_id: u64, backend: KvBackendRef) -> Self {
|
||||
Self {
|
||||
engine,
|
||||
engine_manager,
|
||||
node_id,
|
||||
backend,
|
||||
catalogs: Default::default(),
|
||||
@@ -186,7 +188,9 @@ impl RemoteCatalogManager {
|
||||
let max_table_id = MIN_USER_TABLE_ID - 1;
|
||||
|
||||
// initiate default catalog and schema
|
||||
let default_catalog = self.initiate_default_catalog().await?;
|
||||
let default_catalog = self
|
||||
.create_catalog_and_schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
|
||||
.await?;
|
||||
res.insert(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
|
||||
info!("Default catalog and schema registered");
|
||||
|
||||
@@ -266,13 +270,19 @@ impl RemoteCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn initiate_default_catalog(&self) -> Result<CatalogProviderRef> {
|
||||
let default_catalog = self.new_catalog_provider(DEFAULT_CATALOG_NAME);
|
||||
let default_schema = self.new_schema_provider(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())?;
|
||||
pub async fn create_catalog_and_schema(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
) -> Result<CatalogProviderRef> {
|
||||
let schema_provider = self.new_schema_provider(catalog_name, schema_name);
|
||||
|
||||
let catalog_provider = self.new_catalog_provider(catalog_name);
|
||||
catalog_provider.register_schema(schema_name.to_string(), schema_provider.clone())?;
|
||||
|
||||
let schema_key = SchemaKey {
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
catalog_name: catalog_name.to_string(),
|
||||
schema_name: schema_name.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
self.backend
|
||||
@@ -283,10 +293,10 @@ impl RemoteCatalogManager {
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
info!("Registered default schema");
|
||||
info!("Created schema '{schema_key}'");
|
||||
|
||||
let catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
catalog_name: catalog_name.to_string(),
|
||||
}
|
||||
.to_string();
|
||||
self.backend
|
||||
@@ -297,8 +307,8 @@ impl RemoteCatalogManager {
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
info!("Registered default catalog");
|
||||
Ok(default_catalog)
|
||||
info!("Created catalog '{catalog_key}");
|
||||
Ok(catalog_provider)
|
||||
}
|
||||
|
||||
async fn open_or_create_table(
|
||||
@@ -331,8 +341,13 @@ impl RemoteCatalogManager {
|
||||
table_name: table_name.clone(),
|
||||
table_id,
|
||||
};
|
||||
match self
|
||||
.engine
|
||||
let engine = self
|
||||
.engine_manager
|
||||
.engine(&table_info.meta.engine)
|
||||
.context(TableEngineNotFoundSnafu {
|
||||
engine_name: &table_info.meta.engine,
|
||||
})?;
|
||||
match engine
|
||||
.open_table(&context, request)
|
||||
.await
|
||||
.with_context(|_| OpenTableSnafu {
|
||||
@@ -363,9 +378,10 @@ impl RemoteCatalogManager {
|
||||
primary_key_indices: meta.primary_key_indices.clone(),
|
||||
create_if_not_exists: true,
|
||||
table_options: meta.options.clone(),
|
||||
engine: engine.name().to_string(),
|
||||
};
|
||||
|
||||
self.engine
|
||||
engine
|
||||
.create_table(&context, req)
|
||||
.await
|
||||
.context(CreateTableSnafu {
|
||||
@@ -398,17 +414,14 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
info!("Max table id allocated: {}", max_table_id);
|
||||
|
||||
let mut system_table_requests = self.system_table_requests.lock().await;
|
||||
handle_system_table_request(self, self.engine.clone(), &mut system_table_requests).await?;
|
||||
let engine = self
|
||||
.engine_manager
|
||||
.engine(MITO_ENGINE)
|
||||
.context(TableEngineNotFoundSnafu {
|
||||
engine_name: MITO_ENGINE,
|
||||
})?;
|
||||
handle_system_table_request(self, engine, &mut system_table_requests).await?;
|
||||
info!("All system table opened");
|
||||
|
||||
self.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{NotSupportedSnafu, Result};
|
||||
|
||||
/// Represents a schema, comprising a number of named tables.
|
||||
#[async_trait]
|
||||
@@ -35,15 +35,30 @@ pub trait SchemaProvider: Sync + Send {
|
||||
|
||||
/// If supported by the implementation, adds a new table to this schema.
|
||||
/// If a table of the same name existed before, it returns "Table already exists" error.
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>>;
|
||||
fn register_table(&self, name: String, _table: TableRef) -> Result<Option<TableRef>> {
|
||||
NotSupportedSnafu {
|
||||
op: format!("register_table({name}, <table>)"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// If supported by the implementation, renames an existing table from this schema and returns it.
|
||||
/// If no table of that name exists, returns "Table not found" error.
|
||||
fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef>;
|
||||
fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef> {
|
||||
NotSupportedSnafu {
|
||||
op: format!("rename_table({name}, {new_name})"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// If supported by the implementation, removes an existing table from this schema and returns it.
|
||||
/// If no table of that name exists, returns Ok(None).
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>>;
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
NotSupportedSnafu {
|
||||
op: format!("deregister_table({name})"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// If supported by the implementation, checks the table exist in the schema provider or not.
|
||||
/// If no matched table in the schema provider, return false.
|
||||
|
||||
@@ -17,8 +17,8 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME,
|
||||
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE,
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
|
||||
@@ -112,6 +112,7 @@ impl SystemCatalogTable {
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: TableOptions::default(),
|
||||
engine: engine.name().to_string(),
|
||||
};
|
||||
|
||||
let table = engine
|
||||
@@ -194,12 +195,13 @@ pub fn build_table_insert_request(
|
||||
schema: String,
|
||||
table_name: String,
|
||||
table_id: TableId,
|
||||
engine: String,
|
||||
) -> InsertRequest {
|
||||
let entry_key = format_table_entry_key(&catalog, &schema, table_id);
|
||||
build_insert_request(
|
||||
EntryType::Table,
|
||||
entry_key.as_bytes(),
|
||||
serde_json::to_string(&TableEntryValue { table_name })
|
||||
serde_json::to_string(&TableEntryValue { table_name, engine })
|
||||
.unwrap()
|
||||
.as_bytes(),
|
||||
)
|
||||
@@ -330,6 +332,7 @@ pub fn decode_system_catalog(
|
||||
schema_name: table_parts[1].to_string(),
|
||||
table_name: table_meta.table_name,
|
||||
table_id,
|
||||
engine: table_meta.engine,
|
||||
}))
|
||||
}
|
||||
}
|
||||
@@ -385,11 +388,19 @@ pub struct TableEntry {
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
pub table_id: TableId,
|
||||
pub engine: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct TableEntryValue {
|
||||
pub table_name: String,
|
||||
|
||||
#[serde(default = "mito_engine")]
|
||||
pub engine: String,
|
||||
}
|
||||
|
||||
fn mito_engine() -> String {
|
||||
MITO_ENGINE.to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -399,8 +410,8 @@ mod tests {
|
||||
use datatypes::value::Value;
|
||||
use log_store::NoopLogStore;
|
||||
use mito::config::EngineConfig;
|
||||
use mito::engine::MitoEngine;
|
||||
use object_store::{ObjectStore, ObjectStoreBuilder};
|
||||
use mito::engine::{MitoEngine, MITO_ENGINE};
|
||||
use object_store::ObjectStore;
|
||||
use storage::compaction::noop::NoopCompactionScheduler;
|
||||
use storage::config::EngineConfig as StorageEngineConfig;
|
||||
use storage::EngineImpl;
|
||||
@@ -482,11 +493,9 @@ mod tests {
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = create_temp_dir("system-table-test");
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = object_store::services::Fs::default()
|
||||
.root(&store_dir)
|
||||
.build()
|
||||
.unwrap();
|
||||
let object_store = ObjectStore::new(accessor).finish();
|
||||
let mut builder = object_store::services::Fs::default();
|
||||
builder.root(&store_dir);
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
let noop_compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
|
||||
let table_engine = Arc::new(MitoEngine::new(
|
||||
EngineConfig::default(),
|
||||
@@ -530,6 +539,7 @@ mod tests {
|
||||
DEFAULT_SCHEMA_NAME.to_string(),
|
||||
"my_table".to_string(),
|
||||
1,
|
||||
MITO_ENGINE.to_string(),
|
||||
);
|
||||
let result = catalog_table.insert(table_insertion).await.unwrap();
|
||||
assert_eq!(result, 1);
|
||||
@@ -550,6 +560,7 @@ mod tests {
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "my_table".to_string(),
|
||||
table_id: 1,
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
});
|
||||
assert_eq!(entry, expected);
|
||||
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
|
||||
use common_catalog::format_full_table_name;
|
||||
use datafusion::common::{OwnedTableReference, ResolvedTableReference, TableReference};
|
||||
use datafusion::common::{ResolvedTableReference, TableReference};
|
||||
use datafusion::datasource::provider_as_source;
|
||||
use datafusion::logical_expr::TableSource;
|
||||
use session::context::QueryContext;
|
||||
@@ -26,6 +27,7 @@ use table::table::adapter::DfTableProviderAdapter;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, QueryAccessDeniedSnafu, Result, SchemaNotFoundSnafu, TableNotExistSnafu,
|
||||
};
|
||||
use crate::information_schema::InformationSchemaProvider;
|
||||
use crate::CatalogListRef;
|
||||
|
||||
pub struct DfTableSourceProvider {
|
||||
@@ -87,9 +89,8 @@ impl DfTableSourceProvider {
|
||||
|
||||
pub async fn resolve_table(
|
||||
&mut self,
|
||||
table_ref: OwnedTableReference,
|
||||
table_ref: TableReference<'_>,
|
||||
) -> Result<Arc<dyn TableSource>> {
|
||||
let table_ref = table_ref.as_table_reference();
|
||||
let table_ref = self.resolve_table_ref(table_ref)?;
|
||||
|
||||
let resolved_name = table_ref.to_string();
|
||||
@@ -101,14 +102,25 @@ impl DfTableSourceProvider {
|
||||
let schema_name = table_ref.schema.as_ref();
|
||||
let table_name = table_ref.table.as_ref();
|
||||
|
||||
let catalog = self
|
||||
.catalog_list
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
let schema = catalog.schema(schema_name)?.context(SchemaNotFoundSnafu {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
})?;
|
||||
let schema = if schema_name != INFORMATION_SCHEMA_NAME {
|
||||
let catalog = self
|
||||
.catalog_list
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
catalog.schema(schema_name)?.context(SchemaNotFoundSnafu {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
})?
|
||||
} else {
|
||||
let catalog_provider = self
|
||||
.catalog_list
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
Arc::new(InformationSchemaProvider::new(
|
||||
catalog_name.to_string(),
|
||||
catalog_provider,
|
||||
))
|
||||
};
|
||||
let table = schema
|
||||
.table(table_name)
|
||||
.await?
|
||||
|
||||
@@ -15,28 +15,12 @@
|
||||
// The `tables` table in system catalog keeps a record of all tables created by user.
|
||||
|
||||
use std::any::Any;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use async_stream::stream;
|
||||
use async_trait::async_trait;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use futures::Stream;
|
||||
use snafu::ResultExt;
|
||||
use table::engine::TableEngineRef;
|
||||
use table::error::TablesRecordBatchSnafu;
|
||||
use table::metadata::{TableId, TableInfoRef};
|
||||
use table::table::scan::SimpleTableScan;
|
||||
use table::metadata::TableId;
|
||||
use table::{Table, TableRef};
|
||||
|
||||
use crate::error::{self, Error, InsertCatalogRecordSnafu, Result as CatalogResult};
|
||||
@@ -44,160 +28,9 @@ use crate::system::{
|
||||
build_schema_insert_request, build_table_deletion_request, build_table_insert_request,
|
||||
SystemCatalogTable,
|
||||
};
|
||||
use crate::{
|
||||
CatalogListRef, CatalogProvider, DeregisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Tables holds all tables created by user.
|
||||
pub struct Tables {
|
||||
schema: SchemaRef,
|
||||
catalogs: CatalogListRef,
|
||||
engine_name: String,
|
||||
}
|
||||
|
||||
impl Tables {
|
||||
pub fn new(catalogs: CatalogListRef, engine_name: String) -> Self {
|
||||
Self {
|
||||
schema: Arc::new(build_schema_for_tables()),
|
||||
catalogs,
|
||||
engine_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Table for Tables {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
unreachable!("Tables does not support table_info method")
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_projection: Option<&Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> table::error::Result<PhysicalPlanRef> {
|
||||
let catalogs = self.catalogs.clone();
|
||||
let schema_ref = self.schema.clone();
|
||||
let engine_name = self.engine_name.clone();
|
||||
|
||||
let stream = stream!({
|
||||
for catalog_name in catalogs
|
||||
.catalog_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
{
|
||||
let catalog = catalogs
|
||||
.catalog(&catalog_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.unwrap();
|
||||
for schema_name in catalog
|
||||
.schema_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
{
|
||||
let mut tables_in_schema = Vec::with_capacity(
|
||||
catalog
|
||||
.schema_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.len(),
|
||||
);
|
||||
let schema = catalog
|
||||
.schema(&schema_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
.unwrap();
|
||||
for table_name in schema
|
||||
.table_names()
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?
|
||||
{
|
||||
tables_in_schema.push(table_name);
|
||||
}
|
||||
|
||||
let vec = tables_to_record_batch(
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
tables_in_schema,
|
||||
&engine_name,
|
||||
);
|
||||
let record_batch_res = RecordBatch::new(schema_ref.clone(), vec);
|
||||
yield record_batch_res;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let stream = Box::pin(TablesRecordBatchStream {
|
||||
schema: self.schema.clone(),
|
||||
stream: Box::pin(stream),
|
||||
});
|
||||
Ok(Arc::new(SimpleTableScan::new(stream)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert tables info to `RecordBatch`.
|
||||
fn tables_to_record_batch(
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_names: Vec<String>,
|
||||
engine: &str,
|
||||
) -> Vec<VectorRef> {
|
||||
let mut catalog_vec =
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut schema_vec =
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut table_name_vec =
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut engine_vec =
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
|
||||
for table_name in table_names {
|
||||
// Safety: All these vectors are string type.
|
||||
catalog_vec.push_value_ref(ValueRef::String(catalog_name));
|
||||
schema_vec.push_value_ref(ValueRef::String(schema_name));
|
||||
table_name_vec.push_value_ref(ValueRef::String(&table_name));
|
||||
engine_vec.push_value_ref(ValueRef::String(engine));
|
||||
}
|
||||
|
||||
vec![
|
||||
catalog_vec.to_vector(),
|
||||
schema_vec.to_vector(),
|
||||
table_name_vec.to_vector(),
|
||||
engine_vec.to_vector(),
|
||||
]
|
||||
}
|
||||
|
||||
pub struct TablesRecordBatchStream {
|
||||
schema: SchemaRef,
|
||||
stream: Pin<Box<dyn Stream<Item = RecordBatchResult<RecordBatch>> + Send>>,
|
||||
}
|
||||
|
||||
impl Stream for TablesRecordBatchStream {
|
||||
type Item = RecordBatchResult<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Pin::new(&mut self.stream).poll_next(cx)
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for TablesRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
}
|
||||
use crate::{CatalogProvider, DeregisterTableRequest, SchemaProvider, SchemaProviderRef};
|
||||
|
||||
pub struct InformationSchema {
|
||||
pub tables: Arc<Tables>,
|
||||
pub system: Arc<SystemCatalogTable>,
|
||||
}
|
||||
|
||||
@@ -208,41 +41,19 @@ impl SchemaProvider for InformationSchema {
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Result<Vec<String>, Error> {
|
||||
Ok(vec![
|
||||
"tables".to_string(),
|
||||
SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
])
|
||||
Ok(vec![SYSTEM_CATALOG_TABLE_NAME.to_string()])
|
||||
}
|
||||
|
||||
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
|
||||
if name.eq_ignore_ascii_case("tables") {
|
||||
Ok(Some(self.tables.clone()))
|
||||
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
|
||||
if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
|
||||
Ok(Some(self.system.clone()))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn register_table(
|
||||
&self,
|
||||
_name: String,
|
||||
_table: TableRef,
|
||||
) -> crate::error::Result<Option<TableRef>> {
|
||||
panic!("System catalog & schema does not support register table")
|
||||
}
|
||||
|
||||
fn rename_table(&self, _name: &str, _new_name: String) -> crate::error::Result<TableRef> {
|
||||
unimplemented!("System catalog & schema does not support rename table")
|
||||
}
|
||||
|
||||
fn deregister_table(&self, _name: &str) -> crate::error::Result<Option<TableRef>> {
|
||||
panic!("System catalog & schema does not support deregister table")
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> Result<bool, Error> {
|
||||
Ok(name.eq_ignore_ascii_case("tables")
|
||||
|| name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
|
||||
Ok(name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,13 +62,8 @@ pub struct SystemCatalog {
|
||||
}
|
||||
|
||||
impl SystemCatalog {
|
||||
pub fn new(
|
||||
system: SystemCatalogTable,
|
||||
catalogs: CatalogListRef,
|
||||
engine: TableEngineRef,
|
||||
) -> Self {
|
||||
pub(crate) fn new(system: SystemCatalogTable) -> Self {
|
||||
let schema = InformationSchema {
|
||||
tables: Arc::new(Tables::new(catalogs, engine.name().to_string())),
|
||||
system: Arc::new(system),
|
||||
};
|
||||
Self {
|
||||
@@ -271,8 +77,9 @@ impl SystemCatalog {
|
||||
schema: String,
|
||||
table_name: String,
|
||||
table_id: TableId,
|
||||
engine: String,
|
||||
) -> crate::error::Result<usize> {
|
||||
let request = build_table_insert_request(catalog, schema, table_name, table_id);
|
||||
let request = build_table_insert_request(catalog, schema, table_name, table_id, engine);
|
||||
self.information_schema
|
||||
.system
|
||||
.insert(request)
|
||||
@@ -334,104 +141,3 @@ impl CatalogProvider for SystemCatalog {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_schema_for_tables() -> Schema {
|
||||
let cols = vec![
|
||||
ColumnSchema::new(
|
||||
"catalog".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"schema".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"table_name".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"engine".to_string(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
),
|
||||
];
|
||||
Schema::new(cols)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::physical_plan::SessionContext;
|
||||
use futures_util::StreamExt;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
use super::*;
|
||||
use crate::local::memory::new_memory_catalog_list;
|
||||
use crate::CatalogList;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tables() {
|
||||
let catalog_list = new_memory_catalog_list().unwrap();
|
||||
let schema = catalog_list
|
||||
.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
schema
|
||||
.register_table("test_table".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
|
||||
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
||||
let tables_stream = tables.scan(None, &[], None).await.unwrap();
|
||||
let session_ctx = SessionContext::new();
|
||||
let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
|
||||
|
||||
if let Some(t) = tables_stream.next().await {
|
||||
let batch = t.unwrap();
|
||||
assert_eq!(1, batch.num_rows());
|
||||
assert_eq!(4, batch.num_columns());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(0).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(1).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(2).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(3).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
"greptime",
|
||||
batch.column(0).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"public",
|
||||
batch.column(1).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_table",
|
||||
batch.column(2).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_engine",
|
||||
batch.column(3).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
} else {
|
||||
panic!("Record batch should not be empty!")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::{error, info};
|
||||
use mito::config::EngineConfig;
|
||||
use table::engine::manager::MemoryTableEngineManager;
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
@@ -33,7 +34,8 @@ mod tests {
|
||||
mito::table::test_util::MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
|
||||
let engine_manager = Arc::new(MemoryTableEngineManager::new(mock_engine.clone()));
|
||||
let catalog_manager = LocalCatalogManager::try_new(engine_manager).await.unwrap();
|
||||
catalog_manager.start().await?;
|
||||
Ok(catalog_manager)
|
||||
}
|
||||
|
||||
@@ -27,9 +27,10 @@ mod tests {
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
use datatypes::schema::RawSchema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::manager::MemoryTableEngineManager;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::requests::CreateTableRequest;
|
||||
|
||||
@@ -80,8 +81,11 @@ mod tests {
|
||||
) -> (KvBackendRef, TableEngineRef, Arc<RemoteCatalogManager>) {
|
||||
let backend = Arc::new(MockKvBackend::default()) as KvBackendRef;
|
||||
let table_engine = Arc::new(MockTableEngine::default());
|
||||
let catalog_manager =
|
||||
RemoteCatalogManager::new(table_engine.clone(), node_id, backend.clone());
|
||||
let engine_manager = Arc::new(MemoryTableEngineManager::alias(
|
||||
MITO_ENGINE.to_string(),
|
||||
table_engine.clone(),
|
||||
));
|
||||
let catalog_manager = RemoteCatalogManager::new(engine_manager, node_id, backend.clone());
|
||||
catalog_manager.start().await.unwrap();
|
||||
(backend, table_engine, Arc::new(catalog_manager))
|
||||
}
|
||||
@@ -131,6 +135,7 @@ mod tests {
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
@@ -168,7 +173,6 @@ mod tests {
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(vec!["numbers"], default_schema.table_names().unwrap());
|
||||
|
||||
// register a new table with an nonexistent catalog
|
||||
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
|
||||
@@ -191,6 +195,7 @@ mod tests {
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
@@ -203,14 +208,7 @@ mod tests {
|
||||
table,
|
||||
};
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
.table_names()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>()
|
||||
);
|
||||
assert_eq!(vec![table_name], default_schema.table_names().unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -251,6 +249,7 @@ mod tests {
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, TableId};
|
||||
use client::{Client, Database};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
use prost::Message;
|
||||
use substrait_proto::proto::plan_rel::RelType as PlanRelType;
|
||||
use substrait_proto::proto::read_rel::{NamedTable, ReadType};
|
||||
@@ -64,6 +64,7 @@ async fn run() {
|
||||
table_options: Default::default(),
|
||||
table_id: Some(TableId { id: 1024 }),
|
||||
region_ids: vec![0],
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
};
|
||||
|
||||
let db = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::greptime_database_client::GreptimeDatabaseClient;
|
||||
use api::v1::health_check_client::HealthCheckClient;
|
||||
use api::v1::HealthCheckRequest;
|
||||
use arrow_flight::flight_service_client::FlightServiceClient;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use parking_lot::RwLock;
|
||||
@@ -153,6 +155,13 @@ impl Client {
|
||||
inner: GreptimeDatabaseClient::new(channel),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn health_check(&self) -> Result<()> {
|
||||
let (_, channel) = self.find_channel()?;
|
||||
let mut client = HealthCheckClient::new(channel);
|
||||
client.health_check(HealthCheckRequest {}).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -35,25 +35,44 @@ use crate::error::{
|
||||
};
|
||||
use crate::{error, Client, Result};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Database {
|
||||
// The "catalog" and "schema" to be used in processing the requests at the server side.
|
||||
// They are the "hint" or "context", just like how the "database" in "USE" statement is treated in MySQL.
|
||||
// They will be carried in the request header.
|
||||
catalog: String,
|
||||
schema: String,
|
||||
// The dbname follows naming rule as out mysql, postgres and http
|
||||
// protocol. The server treat dbname in priority of catalog/schema.
|
||||
dbname: String,
|
||||
|
||||
client: Client,
|
||||
ctx: FlightContext,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
/// Create database service client using catalog and schema
|
||||
pub fn new(catalog: impl Into<String>, schema: impl Into<String>, client: Client) -> Self {
|
||||
Self {
|
||||
catalog: catalog.into(),
|
||||
schema: schema.into(),
|
||||
client,
|
||||
ctx: FlightContext::default(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create database service client using dbname.
|
||||
///
|
||||
/// This API is designed for external usage. `dbname` is:
|
||||
///
|
||||
/// - the name of database when using GreptimeDB standalone or cluster
|
||||
/// - the name provided by GreptimeCloud or other multi-tenant GreptimeDB
|
||||
/// environment
|
||||
pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
|
||||
Self {
|
||||
dbname: dbname.into(),
|
||||
client,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +92,14 @@ impl Database {
|
||||
self.schema = schema.into();
|
||||
}
|
||||
|
||||
pub fn dbname(&self) -> &String {
|
||||
&self.dbname
|
||||
}
|
||||
|
||||
pub fn set_dbname(&mut self, dbname: impl Into<String>) {
|
||||
self.dbname = dbname.into();
|
||||
}
|
||||
|
||||
pub fn set_auth(&mut self, auth: AuthScheme) {
|
||||
self.ctx.auth_header = Some(AuthHeader {
|
||||
auth_scheme: Some(auth),
|
||||
@@ -86,6 +113,7 @@ impl Database {
|
||||
catalog: self.catalog.clone(),
|
||||
schema: self.schema.clone(),
|
||||
authorization: self.ctx.auth_header.clone(),
|
||||
dbname: self.dbname.clone(),
|
||||
}),
|
||||
request: Some(Request::Insert(request)),
|
||||
};
|
||||
@@ -167,6 +195,7 @@ impl Database {
|
||||
catalog: self.catalog.clone(),
|
||||
schema: self.schema.clone(),
|
||||
authorization: self.ctx.auth_header.clone(),
|
||||
dbname: self.dbname.clone(),
|
||||
}),
|
||||
request: Some(request),
|
||||
};
|
||||
|
||||
@@ -16,16 +16,14 @@ use std::any::Any;
|
||||
use std::str::FromStr;
|
||||
|
||||
use common_error::prelude::*;
|
||||
use snafu::Location;
|
||||
use tonic::{Code, Status};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Illegal Flight messages, reason: {}", reason))]
|
||||
IllegalFlightMessages {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
IllegalFlightMessages { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to do Flight get, code: {}, source: {}", tonic_code, source))]
|
||||
FlightGet {
|
||||
@@ -47,13 +45,10 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Illegal GRPC client state: {}", err_msg))]
|
||||
IllegalGrpcClientState {
|
||||
err_msg: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
IllegalGrpcClientState { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
MissingField { field: String, location: Location },
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to create gRPC channel, peer address: {}, source: {}",
|
||||
@@ -93,10 +88,6 @@ impl ErrorExt for Error {
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![doc = include_str!("../../../../README.md")]
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use datanode::datanode::{
|
||||
@@ -86,6 +88,10 @@ struct StartCommand {
|
||||
wal_dir: Option<String>,
|
||||
#[clap(long)]
|
||||
procedure_dir: Option<String>,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_timeout: Option<u64>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -146,7 +152,7 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
}
|
||||
|
||||
if let Some(data_dir) = cmd.data_dir {
|
||||
opts.storage = ObjectStoreConfig::File(FileConfig { data_dir });
|
||||
opts.storage.store = ObjectStoreConfig::File(FileConfig { data_dir });
|
||||
}
|
||||
|
||||
if let Some(wal_dir) = cmd.wal_dir {
|
||||
@@ -155,6 +161,15 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
if let Some(procedure_dir) = cmd.procedure_dir {
|
||||
opts.procedure = Some(ProcedureConfig::from_file_path(procedure_dir));
|
||||
}
|
||||
if let Some(http_addr) = cmd.http_addr {
|
||||
opts.http_opts.addr = http_addr
|
||||
}
|
||||
if let Some(http_timeout) = cmd.http_timeout {
|
||||
opts.http_opts.timeout = Duration::from_secs(http_timeout)
|
||||
}
|
||||
|
||||
// Disable dashboard in datanode.
|
||||
opts.http_opts.disable_dashboard = true;
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
@@ -166,8 +181,9 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use datanode::datanode::{CompactionConfig, ObjectStoreConfig};
|
||||
use datanode::datanode::{CompactionConfig, ObjectStoreConfig, RegionManifestConfig};
|
||||
use servers::Mode;
|
||||
|
||||
use super::*;
|
||||
@@ -203,10 +219,15 @@ mod tests {
|
||||
type = "File"
|
||||
data_dir = "/tmp/greptimedb/data/"
|
||||
|
||||
[compaction]
|
||||
max_inflight_tasks = 4
|
||||
max_files_in_level0 = 8
|
||||
[storage.compaction]
|
||||
max_inflight_tasks = 3
|
||||
max_files_in_level0 = 7
|
||||
max_purge_tasks = 32
|
||||
|
||||
[storage.manifest]
|
||||
checkpoint_margin = 9
|
||||
gc_duration = '7s'
|
||||
checkpoint_on_startup = true
|
||||
"#;
|
||||
write!(file, "{}", toml_str).unwrap();
|
||||
|
||||
@@ -237,9 +258,9 @@ mod tests {
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert!(tcp_nodelay);
|
||||
|
||||
match options.storage {
|
||||
ObjectStoreConfig::File(FileConfig { data_dir }) => {
|
||||
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
|
||||
match &options.storage.store {
|
||||
ObjectStoreConfig::File(FileConfig { data_dir, .. }) => {
|
||||
assert_eq!("/tmp/greptimedb/data/", data_dir)
|
||||
}
|
||||
ObjectStoreConfig::S3 { .. } => unreachable!(),
|
||||
ObjectStoreConfig::Oss { .. } => unreachable!(),
|
||||
@@ -247,11 +268,20 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
CompactionConfig {
|
||||
max_inflight_tasks: 4,
|
||||
max_files_in_level0: 8,
|
||||
max_inflight_tasks: 3,
|
||||
max_files_in_level0: 7,
|
||||
max_purge_tasks: 32,
|
||||
sst_write_buffer_size: ReadableSize::mb(8),
|
||||
},
|
||||
options.compaction
|
||||
options.storage.compaction,
|
||||
);
|
||||
assert_eq!(
|
||||
RegionManifestConfig {
|
||||
checkpoint_margin: Some(9),
|
||||
gc_duration: Some(Duration::from_secs(7)),
|
||||
checkpoint_on_startup: true,
|
||||
},
|
||||
options.storage.manifest,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::any::Any;
|
||||
|
||||
use common_error::prelude::*;
|
||||
use rustyline::error::ReadlineError;
|
||||
use snafu::Location;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -66,20 +67,20 @@ pub enum Error {
|
||||
ReadConfig {
|
||||
path: String,
|
||||
source: std::io::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse config, source: {}", source))]
|
||||
ParseConfig {
|
||||
source: toml::de::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing config, msg: {}", msg))]
|
||||
MissingConfig { msg: String, backtrace: Backtrace },
|
||||
MissingConfig { msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Illegal config: {}", msg))]
|
||||
IllegalConfig { msg: String, backtrace: Backtrace },
|
||||
IllegalConfig { msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Illegal auth config: {}", source))]
|
||||
IllegalAuthConfig {
|
||||
@@ -100,13 +101,13 @@ pub enum Error {
|
||||
#[snafu(display("Cannot create REPL: {}", source))]
|
||||
ReplCreation {
|
||||
source: ReadlineError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Error reading command: {}", source))]
|
||||
Readline {
|
||||
source: ReadlineError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to request database, sql: {sql}, source: {source}"))]
|
||||
@@ -187,78 +188,7 @@ impl ErrorExt for Error {
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
type StdResult<E> = std::result::Result<(), E>;
|
||||
|
||||
#[test]
|
||||
fn test_start_node_error() {
|
||||
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
|
||||
datanode::error::MissingNodeIdSnafu {}.fail()
|
||||
}
|
||||
|
||||
let e = throw_datanode_error()
|
||||
.context(StartDatanodeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_start_frontend_error() {
|
||||
fn throw_frontend_error() -> StdResult<frontend::error::Error> {
|
||||
frontend::error::InvalidSqlSnafu { err_msg: "failed" }.fail()
|
||||
}
|
||||
|
||||
let e = throw_frontend_error()
|
||||
.context(StartFrontendSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_start_metasrv_error() {
|
||||
fn throw_metasrv_error() -> StdResult<meta_srv::error::Error> {
|
||||
meta_srv::error::StreamNoneSnafu {}.fail()
|
||||
}
|
||||
|
||||
let e = throw_metasrv_error()
|
||||
.context(StartMetaServerSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_config_error() {
|
||||
fn throw_read_config_error() -> StdResult<std::io::Error> {
|
||||
Err(std::io::ErrorKind::NotFound.into())
|
||||
}
|
||||
|
||||
let e = throw_read_config_error()
|
||||
.context(ReadConfigSnafu { path: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,6 +107,8 @@ pub struct StartCommand {
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
#[clap(long)]
|
||||
disable_dashboard: bool,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -149,18 +151,24 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||
|
||||
let mut http_options = HttpOptions {
|
||||
disable_dashboard: cmd.disable_dashboard,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
http_options.addr = addr;
|
||||
}
|
||||
|
||||
opts.http_options = Some(http_options);
|
||||
|
||||
if let Some(addr) = cmd.grpc_addr {
|
||||
opts.grpc_options = Some(GrpcOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(addr) = cmd.mysql_addr {
|
||||
opts.mysql_options = Some(MysqlOptions {
|
||||
addr,
|
||||
@@ -227,6 +235,7 @@ mod tests {
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
disable_dashboard: false,
|
||||
};
|
||||
|
||||
let opts: FrontendOptions = command.try_into().unwrap();
|
||||
@@ -289,6 +298,7 @@ mod tests {
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
disable_dashboard: false,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(command).unwrap();
|
||||
@@ -319,6 +329,7 @@ mod tests {
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
|
||||
disable_dashboard: false,
|
||||
};
|
||||
|
||||
let plugins = load_frontend_plugins(&command.user_provider);
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::Parser;
|
||||
use common_telemetry::{info, logging, warn};
|
||||
use meta_srv::bootstrap::MetaSrvInstance;
|
||||
@@ -80,6 +82,10 @@ struct StartCommand {
|
||||
selector: Option<String>,
|
||||
#[clap(long)]
|
||||
use_memory_store: bool,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
http_timeout: Option<u64>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -128,6 +134,16 @@ impl TryFrom<StartCommand> for MetaSrvOptions {
|
||||
opts.use_memory_store = true;
|
||||
}
|
||||
|
||||
if let Some(http_addr) = cmd.http_addr {
|
||||
opts.http_opts.addr = http_addr;
|
||||
}
|
||||
if let Some(http_timeout) = cmd.http_timeout {
|
||||
opts.http_opts.timeout = Duration::from_secs(http_timeout);
|
||||
}
|
||||
|
||||
// Disable dashboard in metasrv.
|
||||
opts.http_opts.disable_dashboard = true;
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
@@ -150,6 +166,8 @@ mod tests {
|
||||
config_file: None,
|
||||
selector: Some("LoadBased".to_string()),
|
||||
use_memory_store: false,
|
||||
http_addr: None,
|
||||
http_timeout: None,
|
||||
};
|
||||
let options: MetaSrvOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
|
||||
@@ -178,6 +196,8 @@ mod tests {
|
||||
selector: None,
|
||||
config_file: Some(file.path().to_str().unwrap().to_string()),
|
||||
use_memory_store: false,
|
||||
http_addr: None,
|
||||
http_timeout: None,
|
||||
};
|
||||
let options: MetaSrvOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
|
||||
|
||||
@@ -17,9 +17,7 @@ use std::sync::Arc;
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
use common_telemetry::info;
|
||||
use datanode::datanode::{
|
||||
CompactionConfig, Datanode, DatanodeOptions, ObjectStoreConfig, ProcedureConfig, WalConfig,
|
||||
};
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, ProcedureConfig, StorageConfig, WalConfig};
|
||||
use datanode::instance::InstanceRef;
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::grpc::GrpcOptions;
|
||||
@@ -82,8 +80,7 @@ pub struct StandaloneOptions {
|
||||
pub prometheus_options: Option<PrometheusOptions>,
|
||||
pub prom_options: Option<PromOptions>,
|
||||
pub wal: WalConfig,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub compaction: CompactionConfig,
|
||||
pub storage: StorageConfig,
|
||||
pub procedure: Option<ProcedureConfig>,
|
||||
}
|
||||
|
||||
@@ -101,8 +98,7 @@ impl Default for StandaloneOptions {
|
||||
prometheus_options: Some(PrometheusOptions::default()),
|
||||
prom_options: Some(PromOptions::default()),
|
||||
wal: WalConfig::default(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
compaction: CompactionConfig::default(),
|
||||
storage: StorageConfig::default(),
|
||||
procedure: None,
|
||||
}
|
||||
}
|
||||
@@ -129,7 +125,6 @@ impl StandaloneOptions {
|
||||
enable_memory_catalog: self.enable_memory_catalog,
|
||||
wal: self.wal,
|
||||
storage: self.storage,
|
||||
compaction: self.compaction,
|
||||
procedure: self.procedure,
|
||||
..Default::default()
|
||||
}
|
||||
@@ -241,8 +236,9 @@ async fn build_frontend(
|
||||
plugins: Arc<Plugins>,
|
||||
datanode_instance: InstanceRef,
|
||||
) -> Result<FeInstance> {
|
||||
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
|
||||
frontend_instance.set_script_handler(datanode_instance);
|
||||
let mut frontend_instance = FeInstance::try_new_standalone(datanode_instance.clone())
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
frontend_instance.set_plugins(plugins.clone());
|
||||
Ok(frontend_instance)
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::io::{Read, Write};
|
||||
use bytes::{Buf, BufMut, BytesMut};
|
||||
use common_error::prelude::ErrorExt;
|
||||
use paste::paste;
|
||||
use snafu::{ensure, Backtrace, ErrorCompat, ResultExt, Snafu};
|
||||
use snafu::{ensure, Location, ResultExt, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -31,29 +31,33 @@ pub enum Error {
|
||||
Overflow {
|
||||
src_len: usize,
|
||||
dst_len: usize,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Buffer underflow"))]
|
||||
Underflow { backtrace: Backtrace },
|
||||
Underflow { location: Location },
|
||||
|
||||
#[snafu(display("IO operation reach EOF, source: {}", source))]
|
||||
Eof {
|
||||
source: std::io::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn location_opt(&self) -> Option<common_error::snafu::Location> {
|
||||
match self {
|
||||
Error::Overflow { location, .. } => Some(*location),
|
||||
Error::Underflow { location, .. } => Some(*location),
|
||||
Error::Eof { location, .. } => Some(*location),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_read_le {
|
||||
|
||||
@@ -53,6 +53,10 @@ impl ReadableSize {
|
||||
pub const fn as_mb(self) -> u64 {
|
||||
self.0 / MIB
|
||||
}
|
||||
|
||||
pub const fn as_bytes(self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Div<u64> for ReadableSize {
|
||||
|
||||
@@ -25,3 +25,6 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
|
||||
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
|
||||
/// scripts table id
|
||||
pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
pub const IMMUTABLE_FILE_ENGINE: &str = "file";
|
||||
|
||||
@@ -16,29 +16,29 @@ use std::any::Any;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
use snafu::Location;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Invalid catalog info: {}", key))]
|
||||
InvalidCatalog { key: String, backtrace: Backtrace },
|
||||
InvalidCatalog { key: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to deserialize catalog entry value: {}", raw))]
|
||||
DeserializeCatalogEntryValue {
|
||||
raw: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
source: serde_json::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize catalog entry value"))]
|
||||
SerializeCatalogEntryValue {
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
source: serde_json::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse node id: {}", key))]
|
||||
ParseNodeId { key: String, backtrace: Backtrace },
|
||||
ParseNodeId { key: String, location: Location },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -51,10 +51,6 @@ impl ErrorExt for Error {
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
@@ -5,9 +5,24 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arrow.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-compression = { version = "0.3", features = [
|
||||
"bzip2",
|
||||
"gzip",
|
||||
"xz",
|
||||
"zstd",
|
||||
"futures-io",
|
||||
"tokio",
|
||||
] }
|
||||
async-trait.workspace = true
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
datafusion.workspace = true
|
||||
futures.workspace = true
|
||||
object-store = { path = "../../object-store" }
|
||||
regex = "1.7"
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
url = "2.3"
|
||||
|
||||
84
src/common/datasource/src/compression.rs
Normal file
84
src/common/datasource/src/compression.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::str::FromStr;
|
||||
|
||||
use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder, ZstdDecoder};
|
||||
use tokio::io::{AsyncRead, BufReader};
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum CompressionType {
|
||||
/// Gzip-ed file
|
||||
GZIP,
|
||||
/// Bzip2-ed file
|
||||
BZIP2,
|
||||
/// Xz-ed file (liblzma)
|
||||
XZ,
|
||||
/// Zstd-ed file,
|
||||
ZSTD,
|
||||
/// Uncompressed file
|
||||
UNCOMPRESSED,
|
||||
}
|
||||
|
||||
impl FromStr for CompressionType {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self> {
|
||||
let s = s.to_uppercase();
|
||||
match s.as_str() {
|
||||
"GZIP" | "GZ" => Ok(Self::GZIP),
|
||||
"BZIP2" | "BZ2" => Ok(Self::BZIP2),
|
||||
"XZ" => Ok(Self::XZ),
|
||||
"ZST" | "ZSTD" => Ok(Self::ZSTD),
|
||||
"" => Ok(Self::UNCOMPRESSED),
|
||||
_ => error::UnsupportedCompressionTypeSnafu {
|
||||
compression_type: s,
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for CompressionType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(match self {
|
||||
Self::GZIP => "GZIP",
|
||||
Self::BZIP2 => "BZIP2",
|
||||
Self::XZ => "XZ",
|
||||
Self::ZSTD => "ZSTD",
|
||||
Self::UNCOMPRESSED => "",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl CompressionType {
|
||||
pub const fn is_compressed(&self) -> bool {
|
||||
!matches!(self, &Self::UNCOMPRESSED)
|
||||
}
|
||||
|
||||
pub fn convert_async_read<T: AsyncRead + Unpin + Send + 'static>(
|
||||
&self,
|
||||
s: T,
|
||||
) -> Box<dyn AsyncRead + Unpin + Send> {
|
||||
match self {
|
||||
CompressionType::GZIP => Box::new(GzipDecoder::new(BufReader::new(s))),
|
||||
CompressionType::BZIP2 => Box::new(BzDecoder::new(BufReader::new(s))),
|
||||
CompressionType::XZ => Box::new(XzDecoder::new(BufReader::new(s))),
|
||||
CompressionType::ZSTD => Box::new(ZstdDecoder::new(BufReader::new(s))),
|
||||
CompressionType::UNCOMPRESSED => Box::new(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -15,11 +15,15 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::prelude::*;
|
||||
use snafu::Location;
|
||||
use url::ParseError;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unsupported compression type: {}", compression_type))]
|
||||
UnsupportedCompressionType { compression_type: String },
|
||||
|
||||
#[snafu(display("Unsupported backend protocol: {}", protocol))]
|
||||
UnsupportedBackendProtocol { protocol: String },
|
||||
|
||||
@@ -32,21 +36,59 @@ pub enum Error {
|
||||
#[snafu(display("Invalid url: {}, error :{}", url, source))]
|
||||
InvalidUrl { url: String, source: ParseError },
|
||||
|
||||
#[snafu(display("Failed to decompression, source: {}", source))]
|
||||
Decompression {
|
||||
source: object_store::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build backend, source: {}", source))]
|
||||
BuildBackend {
|
||||
source: object_store::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read object from path: {}, source: {}", path, source))]
|
||||
ReadObject {
|
||||
path: String,
|
||||
location: Location,
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read parquet source: {}", source))]
|
||||
ReadParquetSnafu {
|
||||
location: Location,
|
||||
source: datafusion::parquet::errors::ParquetError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert parquet to schema: {}", source))]
|
||||
ParquetToSchema {
|
||||
location: Location,
|
||||
source: datafusion::parquet::errors::ParquetError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to infer schema from file: {}, source: {}", path, source))]
|
||||
InferSchema {
|
||||
path: String,
|
||||
location: Location,
|
||||
source: arrow_schema::ArrowError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list object in path: {}, source: {}", path, source))]
|
||||
ListObjects {
|
||||
path: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid connection: {}", msg))]
|
||||
InvalidConnection { msg: String },
|
||||
|
||||
#[snafu(display("Failed to join handle: {}", source))]
|
||||
JoinHandle {
|
||||
location: Location,
|
||||
source: tokio::task::JoinError,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -55,21 +97,46 @@ impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
match self {
|
||||
BuildBackend { .. } | ListObjects { .. } => StatusCode::StorageUnavailable,
|
||||
BuildBackend { .. } | ListObjects { .. } | ReadObject { .. } => {
|
||||
StatusCode::StorageUnavailable
|
||||
}
|
||||
|
||||
UnsupportedBackendProtocol { .. }
|
||||
| UnsupportedCompressionType { .. }
|
||||
| InvalidConnection { .. }
|
||||
| InvalidUrl { .. }
|
||||
| EmptyHostPath { .. }
|
||||
| InvalidPath { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
| InvalidPath { .. }
|
||||
| InferSchema { .. }
|
||||
| ReadParquetSnafu { .. }
|
||||
| ParquetToSchema { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
Decompression { .. } | JoinHandle { .. } => StatusCode::Unexpected,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn location_opt(&self) -> Option<common_error::snafu::Location> {
|
||||
use Error::*;
|
||||
match self {
|
||||
BuildBackend { location, .. } => Some(*location),
|
||||
ReadObject { location, .. } => Some(*location),
|
||||
ListObjects { location, .. } => Some(*location),
|
||||
InferSchema { location, .. } => Some(*location),
|
||||
ReadParquetSnafu { location, .. } => Some(*location),
|
||||
ParquetToSchema { location, .. } => Some(*location),
|
||||
Decompression { location, .. } => Some(*location),
|
||||
JoinHandle { location, .. } => Some(*location),
|
||||
|
||||
UnsupportedBackendProtocol { .. }
|
||||
| EmptyHostPath { .. }
|
||||
| InvalidPath { .. }
|
||||
| InvalidUrl { .. }
|
||||
| InvalidConnection { .. }
|
||||
| UnsupportedCompressionType { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
30
src/common/datasource/src/file_format.rs
Normal file
30
src/common/datasource/src/file_format.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod csv;
|
||||
pub mod json;
|
||||
pub mod parquet;
|
||||
|
||||
pub const DEFAULT_SCHEMA_INFER_MAX_RECORD: usize = 1000;
|
||||
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use async_trait::async_trait;
|
||||
use object_store::ObjectStore;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
#[async_trait]
|
||||
pub trait FileFormat: Send + Sync + std::fmt::Debug {
|
||||
async fn infer_schema(&self, store: &ObjectStore, path: String) -> Result<SchemaRef>;
|
||||
}
|
||||
158
src/common/datasource/src/file_format/csv.rs
Normal file
158
src/common/datasource/src/file_format/csv.rs
Normal file
@@ -0,0 +1,158 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::csv::reader::infer_reader_schema as infer_csv_schema;
|
||||
use arrow_schema::SchemaRef;
|
||||
use async_trait::async_trait;
|
||||
use common_runtime;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use tokio_util::io::SyncIoBridge;
|
||||
|
||||
use crate::compression::CompressionType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::file_format::{self, FileFormat};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CsvFormat {
|
||||
pub has_header: bool,
|
||||
pub delimiter: u8,
|
||||
pub schema_infer_max_record: Option<usize>,
|
||||
pub compression_type: CompressionType,
|
||||
}
|
||||
|
||||
impl Default for CsvFormat {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
has_header: true,
|
||||
delimiter: b',',
|
||||
schema_infer_max_record: Some(file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD),
|
||||
compression_type: CompressionType::UNCOMPRESSED,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl FileFormat for CsvFormat {
|
||||
async fn infer_schema(&self, store: &ObjectStore, path: String) -> Result<SchemaRef> {
|
||||
let reader = store
|
||||
.reader(&path)
|
||||
.await
|
||||
.context(error::ReadObjectSnafu { path: &path })?;
|
||||
|
||||
let decoded = self.compression_type.convert_async_read(reader);
|
||||
|
||||
let delimiter = self.delimiter;
|
||||
let schema_infer_max_record = self.schema_infer_max_record;
|
||||
let has_header = self.has_header;
|
||||
|
||||
common_runtime::spawn_blocking_read(move || {
|
||||
let reader = SyncIoBridge::new(decoded);
|
||||
|
||||
let (schema, _records_read) =
|
||||
infer_csv_schema(reader, delimiter, schema_infer_max_record, has_header)
|
||||
.context(error::InferSchemaSnafu { path: &path })?;
|
||||
|
||||
Ok(Arc::new(schema))
|
||||
})
|
||||
.await
|
||||
.context(error::JoinHandleSnafu)?
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::file_format::FileFormat;
|
||||
use crate::test_util::{self, format_schema, test_store};
|
||||
|
||||
fn test_data_root() -> String {
|
||||
test_util::get_data_dir("tests/csv").display().to_string()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn infer_schema_basic() {
|
||||
let csv = CsvFormat::default();
|
||||
let store = test_store(&test_data_root());
|
||||
let schema = csv
|
||||
.infer_schema(&store, "simple.csv".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let formatted: Vec<_> = format_schema(schema);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
"c1: Utf8: NULL",
|
||||
"c2: Int64: NULL",
|
||||
"c3: Int64: NULL",
|
||||
"c4: Int64: NULL",
|
||||
"c5: Int64: NULL",
|
||||
"c6: Int64: NULL",
|
||||
"c7: Int64: NULL",
|
||||
"c8: Int64: NULL",
|
||||
"c9: Int64: NULL",
|
||||
"c10: Int64: NULL",
|
||||
"c11: Float64: NULL",
|
||||
"c12: Float64: NULL",
|
||||
"c13: Utf8: NULL"
|
||||
],
|
||||
formatted,
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn infer_schema_with_limit() {
|
||||
let json = CsvFormat {
|
||||
schema_infer_max_record: Some(3),
|
||||
..CsvFormat::default()
|
||||
};
|
||||
let store = test_store(&test_data_root());
|
||||
let schema = json
|
||||
.infer_schema(&store, "schema_infer_limit.csv".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let formatted: Vec<_> = format_schema(schema);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
"a: Int64: NULL",
|
||||
"b: Float64: NULL",
|
||||
"c: Int64: NULL",
|
||||
"d: Int64: NULL"
|
||||
],
|
||||
formatted
|
||||
);
|
||||
|
||||
let json = CsvFormat::default();
|
||||
let store = test_store(&test_data_root());
|
||||
let schema = json
|
||||
.infer_schema(&store, "schema_infer_limit.csv".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let formatted: Vec<_> = format_schema(schema);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
"a: Int64: NULL",
|
||||
"b: Float64: NULL",
|
||||
"c: Int64: NULL",
|
||||
"d: Utf8: NULL"
|
||||
],
|
||||
formatted
|
||||
);
|
||||
}
|
||||
}
|
||||
121
src/common/datasource/src/file_format/json.rs
Normal file
121
src/common/datasource/src/file_format/json.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::BufReader;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
|
||||
use async_trait::async_trait;
|
||||
use common_runtime;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
use tokio_util::io::SyncIoBridge;
|
||||
|
||||
use crate::compression::CompressionType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::file_format::{self, FileFormat};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct JsonFormat {
|
||||
pub schema_infer_max_record: Option<usize>,
|
||||
pub compression_type: CompressionType,
|
||||
}
|
||||
|
||||
impl Default for JsonFormat {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
schema_infer_max_record: Some(file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD),
|
||||
compression_type: CompressionType::UNCOMPRESSED,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl FileFormat for JsonFormat {
|
||||
async fn infer_schema(&self, store: &ObjectStore, path: String) -> Result<SchemaRef> {
|
||||
let reader = store
|
||||
.reader(&path)
|
||||
.await
|
||||
.context(error::ReadObjectSnafu { path: &path })?;
|
||||
|
||||
let decoded = self.compression_type.convert_async_read(reader);
|
||||
|
||||
let schema_infer_max_record = self.schema_infer_max_record;
|
||||
|
||||
common_runtime::spawn_blocking_read(move || {
|
||||
let mut reader = BufReader::new(SyncIoBridge::new(decoded));
|
||||
|
||||
let iter = ValueIter::new(&mut reader, schema_infer_max_record);
|
||||
|
||||
let schema = infer_json_schema_from_iterator(iter)
|
||||
.context(error::InferSchemaSnafu { path: &path })?;
|
||||
|
||||
Ok(Arc::new(schema))
|
||||
})
|
||||
.await
|
||||
.context(error::JoinHandleSnafu)?
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::file_format::FileFormat;
|
||||
use crate::test_util::{self, format_schema, test_store};
|
||||
|
||||
fn test_data_root() -> String {
|
||||
test_util::get_data_dir("tests/json").display().to_string()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn infer_schema_basic() {
|
||||
let json = JsonFormat::default();
|
||||
let store = test_store(&test_data_root());
|
||||
let schema = json
|
||||
.infer_schema(&store, "simple.json".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let formatted: Vec<_> = format_schema(schema);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
"a: Int64: NULL",
|
||||
"b: Float64: NULL",
|
||||
"c: Boolean: NULL",
|
||||
"d: Utf8: NULL",
|
||||
],
|
||||
formatted
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn infer_schema_with_limit() {
|
||||
let json = JsonFormat {
|
||||
schema_infer_max_record: Some(3),
|
||||
..JsonFormat::default()
|
||||
};
|
||||
let store = test_store(&test_data_root());
|
||||
let schema = json
|
||||
.infer_schema(&store, "schema_infer_limit.json".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let formatted: Vec<_> = format_schema(schema);
|
||||
|
||||
assert_eq!(
|
||||
vec!["a: Int64: NULL", "b: Float64: NULL", "c: Boolean: NULL"],
|
||||
formatted
|
||||
);
|
||||
}
|
||||
}
|
||||
78
src/common/datasource/src/file_format/parquet.rs
Normal file
78
src/common/datasource/src/file_format/parquet.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::SchemaRef;
|
||||
use async_trait::async_trait;
|
||||
use datafusion::parquet::arrow::async_reader::AsyncFileReader;
|
||||
use datafusion::parquet::arrow::parquet_to_arrow_schema;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::file_format::FileFormat;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ParquetFormat {}
|
||||
|
||||
#[async_trait]
|
||||
impl FileFormat for ParquetFormat {
|
||||
async fn infer_schema(&self, store: &ObjectStore, path: String) -> Result<SchemaRef> {
|
||||
let mut reader = store
|
||||
.reader(&path)
|
||||
.await
|
||||
.context(error::ReadObjectSnafu { path: &path })?;
|
||||
|
||||
let metadata = reader
|
||||
.get_metadata()
|
||||
.await
|
||||
.context(error::ReadParquetSnafuSnafu)?;
|
||||
|
||||
let file_metadata = metadata.file_metadata();
|
||||
let schema = parquet_to_arrow_schema(
|
||||
file_metadata.schema_descr(),
|
||||
file_metadata.key_value_metadata(),
|
||||
)
|
||||
.context(error::ParquetToSchemaSnafu)?;
|
||||
|
||||
Ok(Arc::new(schema))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::file_format::FileFormat;
|
||||
use crate::test_util::{self, format_schema, test_store};
|
||||
|
||||
fn test_data_root() -> String {
|
||||
test_util::get_data_dir("tests/parquet")
|
||||
.display()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn infer_schema_basic() {
|
||||
let json = ParquetFormat::default();
|
||||
let store = test_store(&test_data_root());
|
||||
let schema = json
|
||||
.infer_schema(&store, "basic.parquet".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let formatted: Vec<_> = format_schema(schema);
|
||||
|
||||
assert_eq!(vec!["num: Int64: NULL", "str: Utf8: NULL"], formatted);
|
||||
}
|
||||
}
|
||||
@@ -12,7 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod compression;
|
||||
pub mod error;
|
||||
pub mod file_format;
|
||||
pub mod lister;
|
||||
pub mod object_store;
|
||||
pub mod test_util;
|
||||
pub mod util;
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use futures::{future, TryStreamExt};
|
||||
use object_store::{Object, ObjectStore};
|
||||
use object_store::{Entry, ObjectStore};
|
||||
use regex::Regex;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -46,13 +46,12 @@ impl Lister {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<Object>> {
|
||||
pub async fn list(&self) -> Result<Vec<Entry>> {
|
||||
match &self.source {
|
||||
Source::Dir => {
|
||||
let streamer = self
|
||||
.object_store
|
||||
.object(&self.path)
|
||||
.list()
|
||||
.list(&self.path)
|
||||
.await
|
||||
.context(error::ListObjectsSnafu { path: &self.path })?;
|
||||
|
||||
@@ -70,11 +69,14 @@ impl Lister {
|
||||
.context(error::ListObjectsSnafu { path: &self.path })
|
||||
}
|
||||
Source::Filename(filename) => {
|
||||
let obj = self
|
||||
.object_store
|
||||
.object(&format!("{}{}", self.path, filename));
|
||||
|
||||
Ok(vec![obj])
|
||||
// make sure this file exists
|
||||
let file_full_path = format!("{}{}", self.path, filename);
|
||||
let _ = self.object_store.stat(&file_full_path).await.context(
|
||||
error::ListObjectsSnafu {
|
||||
path: &file_full_path,
|
||||
},
|
||||
)?;
|
||||
Ok(vec![Entry::new(&file_full_path)])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
// limitations under the License.
|
||||
|
||||
use object_store::services::Fs;
|
||||
use object_store::{ObjectStore, ObjectStoreBuilder};
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::error::{BuildBackendSnafu, Result};
|
||||
|
||||
pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
|
||||
let accessor = Fs::default()
|
||||
.root(root)
|
||||
.build()
|
||||
.context(error::BuildBackendSnafu)?;
|
||||
|
||||
Ok(ObjectStore::new(accessor).finish())
|
||||
let mut builder = Fs::default();
|
||||
builder.root(root);
|
||||
let object_store = ObjectStore::new(builder)
|
||||
.context(BuildBackendSnafu)?
|
||||
.finish();
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use object_store::services::S3;
|
||||
use object_store::{ObjectStore, ObjectStoreBuilder};
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -73,7 +73,7 @@ pub fn build_s3_backend(
|
||||
}
|
||||
}
|
||||
|
||||
let accessor = builder.build().context(error::BuildBackendSnafu)?;
|
||||
|
||||
Ok(ObjectStore::new(accessor).finish())
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::BuildBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
|
||||
48
src/common/datasource/src/test_util.rs
Normal file
48
src/common/datasource/src/test_util.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use arrow_schema::SchemaRef;
|
||||
use object_store::services::Fs;
|
||||
use object_store::ObjectStore;
|
||||
|
||||
pub fn get_data_dir(path: &str) -> PathBuf {
|
||||
// https://doc.rust-lang.org/cargo/reference/environment-variables.html
|
||||
let dir = env!("CARGO_MANIFEST_DIR");
|
||||
|
||||
PathBuf::from(dir).join(path)
|
||||
}
|
||||
|
||||
pub fn format_schema(schema: SchemaRef) -> Vec<String> {
|
||||
schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| {
|
||||
format!(
|
||||
"{}: {:?}: {}",
|
||||
f.name(),
|
||||
f.data_type(),
|
||||
if f.is_nullable() { "NULL" } else { "NOT NULL" }
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn test_store(root: &str) -> ObjectStore {
|
||||
let mut builder = Fs::default();
|
||||
builder.root(root);
|
||||
|
||||
ObjectStore::new(builder).unwrap().finish()
|
||||
}
|
||||
24
src/common/datasource/tests/README.md
Normal file
24
src/common/datasource/tests/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
### Parquet
|
||||
The `parquet/basic.parquet` was converted from `csv/basic.csv` via [bdt](https://github.com/andygrove/bdt).
|
||||
|
||||
Internal of `parquet/basic.parquet`:
|
||||
|
||||
Data:
|
||||
```
|
||||
+-----+-------+
|
||||
| num | str |
|
||||
+-----+-------+
|
||||
| 5 | test |
|
||||
| 2 | hello |
|
||||
| 4 | foo |
|
||||
+-----+-------+
|
||||
```
|
||||
Schema:
|
||||
```
|
||||
+-------------+-----------+-------------+
|
||||
| column_name | data_type | is_nullable |
|
||||
+-------------+-----------+-------------+
|
||||
| num | Int64 | YES |
|
||||
| str | Utf8 | YES |
|
||||
+-------------+-----------+-------------+
|
||||
```
|
||||
4
src/common/datasource/tests/csv/basic.csv
Normal file
4
src/common/datasource/tests/csv/basic.csv
Normal file
@@ -0,0 +1,4 @@
|
||||
num,str
|
||||
5,test
|
||||
2,hello
|
||||
4,foo
|
||||
|
5
src/common/datasource/tests/csv/schema_infer_limit.csv
Normal file
5
src/common/datasource/tests/csv/schema_infer_limit.csv
Normal file
@@ -0,0 +1,5 @@
|
||||
a,b,c,d
|
||||
1,2,3,4
|
||||
1,2,3,4
|
||||
1,2.0,3,4
|
||||
1,2,4,test
|
||||
|
11
src/common/datasource/tests/csv/simple.csv
Normal file
11
src/common/datasource/tests/csv/simple.csv
Normal file
@@ -0,0 +1,11 @@
|
||||
c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13
|
||||
c,2,1,18109,2033001162,-6513304855495910254,25,43062,1491205016,5863949479783605708,0.110830784,0.9294097332465232,6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW
|
||||
d,5,-40,22614,706441268,-7542719935673075327,155,14337,3373581039,11720144131976083864,0.69632107,0.3114712539863804,C2GT5KVyOPZpgKVl110TyZO0NcJ434
|
||||
b,1,29,-18218,994303988,5983957848665088916,204,9489,3275293996,14857091259186476033,0.53840446,0.17909035118828576,AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz
|
||||
a,1,-85,-15154,1171968280,1919439543497968449,77,52286,774637006,12101411955859039553,0.12285209,0.6864391962767343,0keZ5G8BffGwgF2RwQD59TFzMStxCB
|
||||
b,5,-82,22080,1824882165,7373730676428214987,208,34331,3342719438,3330177516592499461,0.82634634,0.40975383525297016,Ig1QcuKsjHXkproePdERo2w0mYzIqd
|
||||
b,4,-111,-1967,-4229382,1892872227362838079,67,9832,1243785310,8382489916947120498,0.06563997,0.152498292971736,Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH
|
||||
e,3,104,-25136,1738331255,300633854973581194,139,20807,3577318119,13079037564113702254,0.40154034,0.7764360990307122,DuJNG8tufSqW0ZstHqWj3aGvFLMg4A
|
||||
a,3,13,12613,1299719633,2020498574254265315,191,17835,3998790955,14881411008939145569,0.041445434,0.8813167497816289,Amn2K87Db5Es3dFQO9cw9cvpAM6h35
|
||||
d,1,38,18384,-335410409,-1632237090406591229,26,57510,2712615025,1842662804748246269,0.6064476,0.6404495093354053,4HX6feIvmNXBN7XGqgO4YVBkhu8GDI
|
||||
a,4,-38,20744,762932956,308913475857409919,7,45465,1787652631,878137512938218976,0.7459874,0.02182578039211991,ydkwycaISlYSlEq3TlkS2m15I2pcp8
|
||||
|
4
src/common/datasource/tests/json/schema_infer_limit.json
Normal file
4
src/common/datasource/tests/json/schema_infer_limit.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{"a":1}
|
||||
{"a":-10, "b":-3.5}
|
||||
{"a":2, "b":0.6, "c":false}
|
||||
{"a":1, "b":2.0, "c":false, "d":"4"}
|
||||
12
src/common/datasource/tests/json/simple.json
Normal file
12
src/common/datasource/tests/json/simple.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{"a":1, "b":2.0, "c":false, "d":"4"}
|
||||
{"a":-10, "b":-3.5, "c":true, "d":"4"}
|
||||
{"a":2, "b":0.6, "c":false, "d":"text"}
|
||||
{"a":1, "b":2.0, "c":false, "d":"4"}
|
||||
{"a":7, "b":-3.5, "c":true, "d":"4"}
|
||||
{"a":1, "b":0.6, "c":false, "d":"text"}
|
||||
{"a":1, "b":2.0, "c":false, "d":"4"}
|
||||
{"a":5, "b":-3.5, "c":true, "d":"4"}
|
||||
{"a":1, "b":0.6, "c":false, "d":"text"}
|
||||
{"a":1, "b":2.0, "c":false, "d":"4"}
|
||||
{"a":1, "b":-3.5, "c":true, "d":"4"}
|
||||
{"a":100000000000000, "b":0.6, "c":false, "d":"text"}
|
||||
BIN
src/common/datasource/tests/parquet/basic.parquet
Normal file
BIN
src/common/datasource/tests/parquet/basic.parquet
Normal file
Binary file not shown.
@@ -23,10 +23,12 @@ pub trait ErrorExt: std::error::Error {
|
||||
StatusCode::Unknown
|
||||
}
|
||||
|
||||
/// Get the reference to the backtrace of this error, None if the backtrace is unavailable.
|
||||
// Add `_opt` suffix to avoid confusing with similar method in `std::error::Error`, once backtrace
|
||||
// in std is stable, we can deprecate this method.
|
||||
fn backtrace_opt(&self) -> Option<&crate::snafu::Backtrace>;
|
||||
// TODO(ruihang): remove this default implementation
|
||||
/// Get the location of this error, None if the location is unavailable.
|
||||
/// Add `_opt` suffix to avoid confusing with similar method in `std::error::Error`
|
||||
fn location_opt(&self) -> Option<crate::snafu::Location> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the error as [Any](std::any::Any) so that it can be
|
||||
/// downcast to a specific implementation.
|
||||
@@ -71,8 +73,8 @@ impl crate::ext::ErrorExt for BoxedError {
|
||||
self.inner.status_code()
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&crate::snafu::Backtrace> {
|
||||
self.inner.backtrace_opt()
|
||||
fn location_opt(&self) -> Option<crate::snafu::Location> {
|
||||
self.inner.location_opt()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
@@ -84,7 +86,7 @@ impl crate::ext::ErrorExt for BoxedError {
|
||||
// via `ErrorCompat::backtrace()`.
|
||||
impl crate::snafu::ErrorCompat for BoxedError {
|
||||
fn backtrace(&self) -> Option<&crate::snafu::Backtrace> {
|
||||
self.inner.backtrace_opt()
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,7 +120,7 @@ impl crate::ext::ErrorExt for PlainError {
|
||||
self.status_code
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&crate::snafu::Backtrace> {
|
||||
fn location_opt(&self) -> Option<crate::snafu::Location> {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -126,62 +128,3 @@ impl crate::ext::ErrorExt for PlainError {
|
||||
self as _
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::error::Error;
|
||||
|
||||
use snafu::ErrorCompat;
|
||||
|
||||
use super::*;
|
||||
use crate::format::DebugFormat;
|
||||
use crate::mock::MockError;
|
||||
|
||||
#[test]
|
||||
fn test_opaque_error_without_backtrace() {
|
||||
let err = BoxedError::new(MockError::new(StatusCode::Internal));
|
||||
assert!(err.backtrace_opt().is_none());
|
||||
assert_eq!(StatusCode::Internal, err.status_code());
|
||||
assert!(err.as_any().downcast_ref::<MockError>().is_some());
|
||||
assert!(err.source().is_none());
|
||||
|
||||
assert!(ErrorCompat::backtrace(&err).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opaque_error_with_backtrace() {
|
||||
let err = BoxedError::new(MockError::with_backtrace(StatusCode::Internal));
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
assert_eq!(StatusCode::Internal, err.status_code());
|
||||
assert!(err.as_any().downcast_ref::<MockError>().is_some());
|
||||
assert!(err.source().is_none());
|
||||
|
||||
assert!(ErrorCompat::backtrace(&err).is_some());
|
||||
|
||||
let msg = format!("{err:?}");
|
||||
assert!(msg.contains("\nBacktrace:\n"));
|
||||
let fmt_msg = format!("{:?}", DebugFormat::new(&err));
|
||||
assert_eq!(msg, fmt_msg);
|
||||
|
||||
let msg = err.to_string();
|
||||
msg.contains("Internal");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_opaque_error_with_source() {
|
||||
let leaf_err = MockError::with_backtrace(StatusCode::Internal);
|
||||
let internal_err = MockError::with_source(leaf_err);
|
||||
let err = BoxedError::new(internal_err);
|
||||
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
assert_eq!(StatusCode::Internal, err.status_code());
|
||||
assert!(err.as_any().downcast_ref::<MockError>().is_some());
|
||||
assert!(err.source().is_some());
|
||||
|
||||
let msg = format!("{err:?}");
|
||||
assert!(msg.contains("\nBacktrace:\n"));
|
||||
assert!(msg.contains("Caused by"));
|
||||
|
||||
assert!(ErrorCompat::backtrace(&err).is_some());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,9 +33,9 @@ impl<'a, E: ErrorExt + ?Sized> fmt::Debug for DebugFormat<'a, E> {
|
||||
// Source error use debug format for more verbose info.
|
||||
write!(f, " Caused by: {source:?}")?;
|
||||
}
|
||||
if let Some(backtrace) = self.0.backtrace_opt() {
|
||||
if let Some(location) = self.0.location_opt() {
|
||||
// Add a newline to separate causes and backtrace.
|
||||
write!(f, "\nBacktrace:\n{backtrace}")?;
|
||||
write!(f, " at: {location}")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -47,7 +47,7 @@ mod tests {
|
||||
use std::any::Any;
|
||||
|
||||
use snafu::prelude::*;
|
||||
use snafu::{Backtrace, GenerateImplicitData};
|
||||
use snafu::{GenerateImplicitData, Location};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -56,7 +56,7 @@ mod tests {
|
||||
struct Leaf;
|
||||
|
||||
impl ErrorExt for Leaf {
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
fn location_opt(&self) -> Option<Location> {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -66,14 +66,14 @@ mod tests {
|
||||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(display("This is a leaf with backtrace"))]
|
||||
struct LeafWithBacktrace {
|
||||
backtrace: Backtrace,
|
||||
#[snafu(display("This is a leaf with location"))]
|
||||
struct LeafWithLocation {
|
||||
location: Location,
|
||||
}
|
||||
|
||||
impl ErrorExt for LeafWithBacktrace {
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
Some(&self.backtrace)
|
||||
impl ErrorExt for LeafWithLocation {
|
||||
fn location_opt(&self) -> Option<Location> {
|
||||
None
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
@@ -86,12 +86,12 @@ mod tests {
|
||||
struct Internal {
|
||||
#[snafu(source)]
|
||||
source: Leaf,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
}
|
||||
|
||||
impl ErrorExt for Internal {
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
Some(&self.backtrace)
|
||||
fn location_opt(&self) -> Option<Location> {
|
||||
None
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
@@ -106,19 +106,21 @@ mod tests {
|
||||
let msg = format!("{:?}", DebugFormat::new(&err));
|
||||
assert_eq!("This is a leaf error.", msg);
|
||||
|
||||
let err = LeafWithBacktrace {
|
||||
backtrace: Backtrace::generate(),
|
||||
let err = LeafWithLocation {
|
||||
location: Location::generate(),
|
||||
};
|
||||
|
||||
// TODO(ruihang): display location here
|
||||
let msg = format!("{:?}", DebugFormat::new(&err));
|
||||
assert!(msg.starts_with("This is a leaf with backtrace.\nBacktrace:\n"));
|
||||
assert!(msg.starts_with("This is a leaf with location."));
|
||||
|
||||
let err = Internal {
|
||||
source: Leaf,
|
||||
backtrace: Backtrace::generate(),
|
||||
location: Location::generate(),
|
||||
};
|
||||
|
||||
// TODO(ruihang): display location here
|
||||
let msg = format!("{:?}", DebugFormat::new(&err));
|
||||
assert!(msg.contains("Internal error. Caused by: Leaf\nBacktrace:\n"));
|
||||
assert!(msg.contains("Internal error. Caused by: Leaf"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
use std::any::Any;
|
||||
use std::fmt;
|
||||
|
||||
use snafu::GenerateImplicitData;
|
||||
use snafu::Location;
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
@@ -25,34 +25,19 @@ use crate::prelude::*;
|
||||
#[derive(Debug)]
|
||||
pub struct MockError {
|
||||
pub code: StatusCode,
|
||||
backtrace: Option<Backtrace>,
|
||||
source: Option<Box<MockError>>,
|
||||
}
|
||||
|
||||
impl MockError {
|
||||
/// Create a new [MockError] without backtrace.
|
||||
pub fn new(code: StatusCode) -> MockError {
|
||||
MockError {
|
||||
code,
|
||||
backtrace: None,
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new [MockError] with backtrace.
|
||||
pub fn with_backtrace(code: StatusCode) -> MockError {
|
||||
MockError {
|
||||
code,
|
||||
backtrace: Some(Backtrace::generate()),
|
||||
source: None,
|
||||
}
|
||||
MockError { code, source: None }
|
||||
}
|
||||
|
||||
/// Create a new [MockError] with source.
|
||||
pub fn with_source(source: MockError) -> MockError {
|
||||
MockError {
|
||||
code: source.code,
|
||||
backtrace: None,
|
||||
source: Some(Box::new(source)),
|
||||
}
|
||||
}
|
||||
@@ -75,39 +60,11 @@ impl ErrorExt for MockError {
|
||||
self.code
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
self.backtrace
|
||||
.as_ref()
|
||||
.or_else(|| self.source.as_ref().and_then(|err| err.backtrace_opt()))
|
||||
fn location_opt(&self) -> Option<Location> {
|
||||
None
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCompat for MockError {
|
||||
fn backtrace(&self) -> Option<&Backtrace> {
|
||||
self.backtrace_opt()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::error::Error;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_mock_error() {
|
||||
let err = MockError::new(StatusCode::Unknown);
|
||||
assert!(err.backtrace_opt().is_none());
|
||||
|
||||
let err = MockError::with_backtrace(StatusCode::Unknown);
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
|
||||
let root_err = MockError::with_source(err);
|
||||
assert!(root_err.source().is_some());
|
||||
assert!(root_err.backtrace_opt().is_some());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,6 +36,8 @@ macro_rules! ok {
|
||||
}
|
||||
|
||||
pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
|
||||
let mut result = TokenStream::new();
|
||||
|
||||
// extract arg map
|
||||
let arg_pairs = parse_macro_input!(args as AttributeArgs);
|
||||
let arg_span = arg_pairs[0].span();
|
||||
@@ -59,12 +61,17 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
|
||||
let arg_types = ok!(extract_input_types(inputs));
|
||||
|
||||
// build the struct and its impl block
|
||||
let struct_code = build_struct(
|
||||
attrs,
|
||||
vis,
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
ok!(get_ident(&arg_map, "display_name", arg_span)),
|
||||
);
|
||||
// only do this when `display_name` is specified
|
||||
if let Ok(display_name) = get_ident(&arg_map, "display_name", arg_span) {
|
||||
let struct_code = build_struct(
|
||||
attrs,
|
||||
vis,
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
display_name,
|
||||
);
|
||||
result.extend(struct_code);
|
||||
}
|
||||
|
||||
let calc_fn_code = build_calc_fn(
|
||||
ok!(get_ident(&arg_map, "name", arg_span)),
|
||||
arg_types,
|
||||
@@ -77,8 +84,6 @@ pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenSt
|
||||
}
|
||||
.into();
|
||||
|
||||
let mut result = TokenStream::new();
|
||||
result.extend(struct_code);
|
||||
result.extend(calc_fn_code);
|
||||
result.extend(input_fn_code);
|
||||
result
|
||||
@@ -207,7 +212,7 @@ fn build_calc_fn(
|
||||
fn calc(input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
|
||||
assert_eq!(input.len(), #num_params);
|
||||
|
||||
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.data().clone().into())?; )*
|
||||
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.to_data().into())?; )*
|
||||
|
||||
// TODO(ruihang): add ensure!()
|
||||
|
||||
|
||||
@@ -11,11 +11,17 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
use std::sync::Arc;
|
||||
mod to_unixtime;
|
||||
|
||||
use to_unixtime::ToUnixtimeFunction;
|
||||
|
||||
use crate::scalars::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct TimestampFunction;
|
||||
|
||||
impl TimestampFunction {
|
||||
pub fn register(_registry: &FunctionRegistry) {}
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(ToUnixtimeFunction::default()));
|
||||
}
|
||||
}
|
||||
|
||||
148
src/common/function/src/scalars/timestamp/to_unixtime.rs
Normal file
148
src/common/function/src/scalars/timestamp/to_unixtime.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::{self, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::StringType;
|
||||
use datatypes::vectors::{Int64Vector, StringVector, Vector, VectorRef};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToUnixtimeFunction;
|
||||
|
||||
const NAME: &str = "to_unixtime";
|
||||
|
||||
fn convert_to_seconds(arg: &str) -> Option<i64> {
|
||||
match Timestamp::from_str(arg) {
|
||||
Ok(ts) => {
|
||||
let sec_mul = (TimeUnit::Second.factor() / ts.unit().factor()) as i64;
|
||||
Some(ts.value().div_euclid(sec_mul))
|
||||
}
|
||||
Err(_err) => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ToUnixtimeFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::timestamp_second_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::String(StringType)],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
match columns[0].data_type() {
|
||||
ConcreteDataType::String(_) => {
|
||||
let array = columns[0].to_arrow_array();
|
||||
let vector = StringVector::try_from_arrow_array(&array).unwrap();
|
||||
Ok(Arc::new(Int64Vector::from(
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_seconds(&vector.get(i).to_string()))
|
||||
.collect::<Vec<_>>(),
|
||||
)))
|
||||
}
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ToUnixtimeFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "TO_UNIXTIME")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::StringType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::{ToUnixtimeFunction, *};
|
||||
use crate::scalars::Function;
|
||||
|
||||
#[test]
|
||||
fn test_to_unixtime() {
|
||||
let f = ToUnixtimeFunction::default();
|
||||
assert_eq!("to_unixtime", f.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
f.return_type(&[]).unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::String(StringType)]
|
||||
));
|
||||
|
||||
let times = vec![
|
||||
Some("2023-03-01T06:35:02Z"),
|
||||
None,
|
||||
Some("2022-06-30T23:59:60Z"),
|
||||
Some("invalid_time_stamp"),
|
||||
];
|
||||
let results = vec![Some(1677652502), None, Some(1656633600), None];
|
||||
let args: Vec<VectorRef> = vec![Arc::new(StringVector::from(times.clone()))];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in times.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
if i == 1 || i == 3 {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::Int64(ts) => {
|
||||
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -178,6 +178,7 @@ pub fn create_expr_to_request(
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options,
|
||||
engine: expr.engine,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::any::Any;
|
||||
use api::DecodeError;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
use snafu::Location;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -32,7 +32,7 @@ pub enum Error {
|
||||
DecodeInsert { source: DecodeError },
|
||||
|
||||
#[snafu(display("Illegal insert data"))]
|
||||
IllegalInsertData { backtrace: Backtrace },
|
||||
IllegalInsertData { location: Location },
|
||||
|
||||
#[snafu(display("Column datatype error, source: {}", source))]
|
||||
ColumnDataType {
|
||||
@@ -48,17 +48,14 @@ pub enum Error {
|
||||
DuplicatedTimestampColumn {
|
||||
exists: String,
|
||||
duplicated: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing timestamp column, msg: {}", msg))]
|
||||
MissingTimestampColumn { msg: String, backtrace: Backtrace },
|
||||
MissingTimestampColumn { msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid column proto: {}", err_msg))]
|
||||
InvalidColumnProto {
|
||||
err_msg: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
InvalidColumnProto { err_msg: String, location: Location },
|
||||
#[snafu(display("Failed to create vector, source: {}", source))]
|
||||
CreateVector {
|
||||
#[snafu(backtrace)]
|
||||
@@ -66,7 +63,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
MissingField { field: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid column default constraint, source: {}", source))]
|
||||
ColumnDefaultConstraint {
|
||||
@@ -113,9 +110,6 @@ impl ErrorExt for Error {
|
||||
Error::UnrecognizedTableOption { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
|
||||
@@ -195,6 +195,7 @@ pub fn build_create_expr_from_insertion(
|
||||
table_id: Option<TableId>,
|
||||
table_name: &str,
|
||||
columns: &[Column],
|
||||
engine: &str,
|
||||
) -> Result<CreateTableExpr> {
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
let mut column_defs = Vec::default();
|
||||
@@ -256,6 +257,7 @@ pub fn build_create_expr_from_insertion(
|
||||
table_options: Default::default(),
|
||||
table_id: table_id.map(|id| api::v1::TableId { id }),
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
engine: engine.to_string(),
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
@@ -455,6 +457,7 @@ mod tests {
|
||||
use api::v1::column::{self, SemanticType, Values};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
use common_catalog::consts::MITO_ENGINE;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
use common_time::timestamp::Timestamp;
|
||||
@@ -493,13 +496,22 @@ mod tests {
|
||||
let table_id = Some(10);
|
||||
let table_name = "test_metric";
|
||||
|
||||
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
|
||||
assert!(
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
|
||||
.is_err()
|
||||
);
|
||||
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let create_expr =
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
||||
.unwrap();
|
||||
let create_expr = build_create_expr_from_insertion(
|
||||
"",
|
||||
"",
|
||||
table_id,
|
||||
table_name,
|
||||
&insert_batch.0,
|
||||
MITO_ENGINE,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
|
||||
assert_eq!(table_name, create_expr.table_name);
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::any::Any;
|
||||
use std::io;
|
||||
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use snafu::{Backtrace, ErrorCompat, Snafu};
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -29,11 +29,11 @@ pub enum Error {
|
||||
#[snafu(display("Invalid config file path, {}", source))]
|
||||
InvalidConfigFilePath {
|
||||
source: io::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
MissingField { field: String, location: Location },
|
||||
|
||||
#[snafu(display(
|
||||
"Write type mismatch, column name: {}, expected: {}, actual: {}",
|
||||
@@ -45,13 +45,13 @@ pub enum Error {
|
||||
column_name: String,
|
||||
expected: String,
|
||||
actual: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create gRPC channel, source: {}", source))]
|
||||
CreateChannel {
|
||||
source: tonic::transport::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create RecordBatch, source: {}", source))]
|
||||
@@ -61,7 +61,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert Arrow type: {}", from))]
|
||||
Conversion { from: String, backtrace: Backtrace },
|
||||
Conversion { from: String, location: Location },
|
||||
|
||||
#[snafu(display("Column datatype error, source: {}", source))]
|
||||
ColumnDataType {
|
||||
@@ -72,14 +72,11 @@ pub enum Error {
|
||||
#[snafu(display("Failed to decode FlightData, source: {}", source))]
|
||||
DecodeFlightData {
|
||||
source: api::DecodeError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid FlightData, reason: {}", reason))]
|
||||
InvalidFlightData {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
InvalidFlightData { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to convert Arrow Schema, source: {}", source))]
|
||||
ConvertArrowSchema {
|
||||
@@ -107,65 +104,7 @@ impl ErrorExt for Error {
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::*;
|
||||
|
||||
type StdResult<E> = std::result::Result<(), E>;
|
||||
|
||||
fn throw_none_option() -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_field_error() {
|
||||
let e = throw_none_option()
|
||||
.context(MissingFieldSnafu { field: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_mismatch_error() {
|
||||
let e = throw_none_option()
|
||||
.context(TypeMismatchSnafu {
|
||||
column_name: "",
|
||||
expected: "",
|
||||
actual: "",
|
||||
})
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_channel_error() {
|
||||
fn throw_tonic_error() -> StdResult<tonic::transport::Error> {
|
||||
tonic::transport::Endpoint::new("http//http").map(|_| ())
|
||||
}
|
||||
|
||||
let e = throw_tonic_error()
|
||||
.context(CreateChannelSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::any::Any;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use snafu::{Backtrace, Snafu};
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -30,7 +30,7 @@ pub enum Error {
|
||||
ProfilingNotEnabled,
|
||||
|
||||
#[snafu(display("Failed to build temp file from given path: {:?}", path))]
|
||||
BuildTempPath { path: PathBuf, backtrace: Backtrace },
|
||||
BuildTempPath { path: PathBuf, location: Location },
|
||||
|
||||
#[snafu(display("Failed to open temp file: {}", path))]
|
||||
OpenTempFile {
|
||||
@@ -56,10 +56,6 @@ impl ErrorExt for Error {
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
snafu::ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait.workspace = true
|
||||
async-stream.workspace = true
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
|
||||
@@ -13,9 +13,11 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::string::FromUtf8Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_error::prelude::*;
|
||||
use snafu::Location;
|
||||
|
||||
use crate::procedure::ProcedureId;
|
||||
|
||||
@@ -33,24 +35,25 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Loader {} is already registered", name))]
|
||||
LoaderConflict { name: String, backtrace: Backtrace },
|
||||
LoaderConflict { name: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to serialize to json, source: {}", source))]
|
||||
ToJson {
|
||||
source: serde_json::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Procedure {} already exists", procedure_id))]
|
||||
DuplicateProcedure {
|
||||
procedure_id: ProcedureId,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to put {}, source: {}", key, source))]
|
||||
#[snafu(display("Failed to put state, key: '{key}', source: {source}"))]
|
||||
PutState {
|
||||
key: String,
|
||||
source: object_store::Error,
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to delete {}, source: {}", key, source))]
|
||||
@@ -59,10 +62,18 @@ pub enum Error {
|
||||
source: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list {}, source: {}", path, source))]
|
||||
#[snafu(display("Failed to delete keys: '{keys}', source: {source}"))]
|
||||
DeleteStates {
|
||||
keys: String,
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to list state, path: '{path}', source: {source}"))]
|
||||
ListState {
|
||||
path: String,
|
||||
source: object_store::Error,
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to read {}, source: {}", key, source))]
|
||||
@@ -74,7 +85,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to deserialize from json, source: {}", source))]
|
||||
FromJson {
|
||||
source: serde_json::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Procedure exec failed, source: {}", source))]
|
||||
@@ -89,13 +100,13 @@ pub enum Error {
|
||||
#[snafu(display("Failed to wait watcher, source: {}", source))]
|
||||
WaitWatcher {
|
||||
source: tokio::sync::watch::error::RecvError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute procedure, source: {}", source))]
|
||||
ProcedureExec {
|
||||
source: Arc<Error>,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
@@ -107,6 +118,9 @@ pub enum Error {
|
||||
source: Arc<Error>,
|
||||
procedure_id: ProcedureId,
|
||||
},
|
||||
|
||||
#[snafu(display("Corrupted data, error: {source}"))]
|
||||
CorruptedData { source: FromUtf8Error },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -114,11 +128,13 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::External { source } => source.status_code(),
|
||||
Error::External { source }
|
||||
| Error::PutState { source, .. }
|
||||
| Error::DeleteStates { source, .. }
|
||||
| Error::ListState { source, .. } => source.status_code(),
|
||||
|
||||
Error::ToJson { .. }
|
||||
| Error::PutState { .. }
|
||||
| Error::DeleteState { .. }
|
||||
| Error::ListState { .. }
|
||||
| Error::ReadState { .. }
|
||||
| Error::FromJson { .. }
|
||||
| Error::RetryTimesExceeded { .. }
|
||||
@@ -127,15 +143,11 @@ impl ErrorExt for Error {
|
||||
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::ProcedurePanic { .. } => StatusCode::Unexpected,
|
||||
Error::ProcedurePanic { .. } | Error::CorruptedData { .. } => StatusCode::Unexpected,
|
||||
Error::ProcedureExec { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
pub mod error;
|
||||
pub mod local;
|
||||
mod procedure;
|
||||
mod store;
|
||||
pub mod store;
|
||||
pub mod watcher;
|
||||
|
||||
pub use crate::error::{Error, Result};
|
||||
|
||||
@@ -22,7 +22,6 @@ use std::time::Duration;
|
||||
use async_trait::async_trait;
|
||||
use backon::ExponentialBuilder;
|
||||
use common_telemetry::logging;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::ensure;
|
||||
use tokio::sync::watch::{self, Receiver, Sender};
|
||||
use tokio::sync::Notify;
|
||||
@@ -31,7 +30,7 @@ use crate::error::{DuplicateProcedureSnafu, LoaderConflictSnafu, Result};
|
||||
use crate::local::lock::LockMap;
|
||||
use crate::local::runner::Runner;
|
||||
use crate::procedure::BoxedProcedureLoader;
|
||||
use crate::store::{ObjectStateStore, ProcedureMessage, ProcedureStore, StateStoreRef};
|
||||
use crate::store::{ProcedureMessage, ProcedureStore, StateStoreRef};
|
||||
use crate::{
|
||||
BoxedProcedure, ContextProvider, LockKey, ProcedureId, ProcedureManager, ProcedureState,
|
||||
ProcedureWithId, Watcher,
|
||||
@@ -291,12 +290,19 @@ impl ManagerContext {
|
||||
/// Config for [LocalManager].
|
||||
#[derive(Debug)]
|
||||
pub struct ManagerConfig {
|
||||
/// Object store
|
||||
pub object_store: ObjectStore,
|
||||
pub max_retry_times: usize,
|
||||
pub retry_delay: Duration,
|
||||
}
|
||||
|
||||
impl Default for ManagerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A [ProcedureManager] that maintains procedure states locally.
|
||||
pub struct LocalManager {
|
||||
manager_ctx: Arc<ManagerContext>,
|
||||
@@ -307,10 +313,10 @@ pub struct LocalManager {
|
||||
|
||||
impl LocalManager {
|
||||
/// Create a new [LocalManager] with specific `config`.
|
||||
pub fn new(config: ManagerConfig) -> LocalManager {
|
||||
pub fn new(config: ManagerConfig, state_store: StateStoreRef) -> LocalManager {
|
||||
LocalManager {
|
||||
manager_ctx: Arc::new(ManagerContext::new()),
|
||||
state_store: Arc::new(ObjectStateStore::new(config.object_store)),
|
||||
state_store,
|
||||
max_retry_times: config.max_retry_times,
|
||||
retry_delay: config.retry_delay,
|
||||
}
|
||||
@@ -423,7 +429,7 @@ impl ProcedureManager for LocalManager {
|
||||
mod test_util {
|
||||
use common_test_util::temp_dir::TempDir;
|
||||
use object_store::services::Fs as Builder;
|
||||
use object_store::ObjectStoreBuilder;
|
||||
use object_store::ObjectStore;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -433,8 +439,9 @@ mod test_util {
|
||||
|
||||
pub(crate) fn new_object_store(dir: &TempDir) -> ObjectStore {
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let accessor = Builder::default().root(store_dir).build().unwrap();
|
||||
ObjectStore::new(accessor).finish()
|
||||
let mut builder = Builder::default();
|
||||
builder.root(store_dir);
|
||||
ObjectStore::new(builder).unwrap().finish()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,6 +453,7 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::store::ObjectStateStore;
|
||||
use crate::{Context, Procedure, Status};
|
||||
|
||||
#[test]
|
||||
@@ -554,11 +562,11 @@ mod tests {
|
||||
fn test_register_loader() {
|
||||
let dir = create_temp_dir("register");
|
||||
let config = ManagerConfig {
|
||||
object_store: test_util::new_object_store(&dir),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
};
|
||||
let manager = LocalManager::new(config);
|
||||
let state_store = Arc::new(ObjectStateStore::new(test_util::new_object_store(&dir)));
|
||||
let manager = LocalManager::new(config, state_store);
|
||||
|
||||
manager
|
||||
.register_loader("ProcedureToLoad", ProcedureToLoad::loader())
|
||||
@@ -575,11 +583,11 @@ mod tests {
|
||||
let dir = create_temp_dir("recover");
|
||||
let object_store = test_util::new_object_store(&dir);
|
||||
let config = ManagerConfig {
|
||||
object_store: object_store.clone(),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
};
|
||||
let manager = LocalManager::new(config);
|
||||
let state_store = Arc::new(ObjectStateStore::new(object_store.clone()));
|
||||
let manager = LocalManager::new(config, state_store);
|
||||
|
||||
manager
|
||||
.register_loader("ProcedureToLoad", ProcedureToLoad::loader())
|
||||
@@ -621,11 +629,11 @@ mod tests {
|
||||
async fn test_submit_procedure() {
|
||||
let dir = create_temp_dir("submit");
|
||||
let config = ManagerConfig {
|
||||
object_store: test_util::new_object_store(&dir),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
};
|
||||
let manager = LocalManager::new(config);
|
||||
let state_store = Arc::new(ObjectStateStore::new(test_util::new_object_store(&dir)));
|
||||
let manager = LocalManager::new(config, state_store);
|
||||
|
||||
let procedure_id = ProcedureId::random();
|
||||
assert!(manager
|
||||
@@ -669,11 +677,11 @@ mod tests {
|
||||
async fn test_state_changed_on_err() {
|
||||
let dir = create_temp_dir("on_err");
|
||||
let config = ManagerConfig {
|
||||
object_store: test_util::new_object_store(&dir),
|
||||
max_retry_times: 3,
|
||||
retry_delay: Duration::from_millis(500),
|
||||
};
|
||||
let manager = LocalManager::new(config);
|
||||
let state_store = Arc::new(ObjectStateStore::new(test_util::new_object_store(&dir)));
|
||||
let manager = LocalManager::new(config, state_store);
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MockProcedure {
|
||||
|
||||
@@ -473,8 +473,7 @@ mod tests {
|
||||
|
||||
async fn check_files(object_store: &ObjectStore, procedure_id: ProcedureId, files: &[&str]) {
|
||||
let dir = format!("{procedure_id}/");
|
||||
let object = object_store.object(&dir);
|
||||
let lister = object.list().await.unwrap();
|
||||
let lister = object_store.list(&dir).await.unwrap();
|
||||
let mut files_in_dir: Vec<_> = lister
|
||||
.map_ok(|de| de.name().to_string())
|
||||
.try_collect()
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::error::{Result, ToJsonSnafu};
|
||||
pub(crate) use crate::store::state_store::{ObjectStateStore, StateStoreRef};
|
||||
use crate::{BoxedProcedure, ProcedureId};
|
||||
|
||||
mod state_store;
|
||||
pub mod state_store;
|
||||
|
||||
/// Serialized data of a procedure.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -248,15 +248,15 @@ mod tests {
|
||||
use async_trait::async_trait;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use object_store::services::Fs as Builder;
|
||||
use object_store::ObjectStoreBuilder;
|
||||
|
||||
use super::*;
|
||||
use crate::{Context, LockKey, Procedure, Status};
|
||||
|
||||
fn procedure_store_for_test(dir: &TempDir) -> ProcedureStore {
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let accessor = Builder::default().root(store_dir).build().unwrap();
|
||||
let object_store = ObjectStore::new(accessor).finish();
|
||||
let mut builder = Builder::default();
|
||||
builder.root(store_dir);
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
|
||||
ProcedureStore::from(object_store)
|
||||
}
|
||||
|
||||
@@ -15,22 +15,25 @@
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::try_stream;
|
||||
use async_trait::async_trait;
|
||||
use futures::{Stream, TryStreamExt};
|
||||
use object_store::{ObjectMode, ObjectStore};
|
||||
use common_error::ext::PlainError;
|
||||
use common_error::prelude::{BoxedError, StatusCode};
|
||||
use futures::{Stream, StreamExt};
|
||||
use object_store::{EntryMode, Metakey, ObjectStore};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{DeleteStateSnafu, Error, PutStateSnafu, Result};
|
||||
use crate::error::{DeleteStateSnafu, ListStateSnafu, PutStateSnafu, Result};
|
||||
|
||||
/// Key value from state store.
|
||||
type KeyValue = (String, Vec<u8>);
|
||||
pub type KeyValue = (String, Vec<u8>);
|
||||
|
||||
/// Stream that yields [KeyValue].
|
||||
type KeyValueStream = Pin<Box<dyn Stream<Item = Result<KeyValue>> + Send>>;
|
||||
pub type KeyValueStream = Pin<Box<dyn Stream<Item = Result<KeyValue>> + Send>>;
|
||||
|
||||
/// Storage layer for persisting procedure's state.
|
||||
#[async_trait]
|
||||
pub(crate) trait StateStore: Send + Sync {
|
||||
pub trait StateStore: Send + Sync {
|
||||
/// Puts `key` and `value` into the store.
|
||||
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()>;
|
||||
|
||||
@@ -50,13 +53,13 @@ pub(crate) type StateStoreRef = Arc<dyn StateStore>;
|
||||
|
||||
/// [StateStore] based on [ObjectStore].
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ObjectStateStore {
|
||||
pub struct ObjectStateStore {
|
||||
store: ObjectStore,
|
||||
}
|
||||
|
||||
impl ObjectStateStore {
|
||||
/// Returns a new [ObjectStateStore] with specific `store`.
|
||||
pub(crate) fn new(store: ObjectStore) -> ObjectStateStore {
|
||||
pub fn new(store: ObjectStore) -> ObjectStateStore {
|
||||
ObjectStateStore { store }
|
||||
}
|
||||
}
|
||||
@@ -64,49 +67,83 @@ impl ObjectStateStore {
|
||||
#[async_trait]
|
||||
impl StateStore for ObjectStateStore {
|
||||
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()> {
|
||||
let object = self.store.object(key);
|
||||
object.write(value).await.context(PutStateSnafu { key })
|
||||
self.store
|
||||
.write(key, value)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
e.to_string(),
|
||||
StatusCode::StorageUnavailable,
|
||||
))
|
||||
})
|
||||
.context(PutStateSnafu { key })
|
||||
}
|
||||
|
||||
async fn walk_top_down(&self, path: &str) -> Result<KeyValueStream> {
|
||||
let path_string = path.to_string();
|
||||
|
||||
let lister = self
|
||||
let mut lister = self
|
||||
.store
|
||||
.object(path)
|
||||
.scan()
|
||||
.scan(path)
|
||||
.await
|
||||
.map_err(|e| Error::ListState {
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
e.to_string(),
|
||||
StatusCode::StorageUnavailable,
|
||||
))
|
||||
})
|
||||
.with_context(|_| ListStateSnafu {
|
||||
path: path_string.clone(),
|
||||
source: e,
|
||||
})?;
|
||||
|
||||
let stream = lister
|
||||
.try_filter_map(|entry| async move {
|
||||
let store = self.store.clone();
|
||||
|
||||
let stream = try_stream!({
|
||||
while let Some(res) = lister.next().await {
|
||||
let entry = res
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
e.to_string(),
|
||||
StatusCode::StorageUnavailable,
|
||||
))
|
||||
})
|
||||
.context(ListStateSnafu { path: &path_string })?;
|
||||
let key = entry.path();
|
||||
let key_value = match entry.mode().await? {
|
||||
ObjectMode::FILE => {
|
||||
let value = entry.read().await?;
|
||||
|
||||
Some((key.to_string(), value))
|
||||
}
|
||||
ObjectMode::DIR | ObjectMode::Unknown => None,
|
||||
};
|
||||
|
||||
Ok(key_value)
|
||||
})
|
||||
.map_err(move |e| Error::ListState {
|
||||
path: path_string.clone(),
|
||||
source: e,
|
||||
});
|
||||
let metadata = store
|
||||
.metadata(&entry, Metakey::Mode)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
e.to_string(),
|
||||
StatusCode::StorageUnavailable,
|
||||
))
|
||||
})
|
||||
.context(ListStateSnafu { path: key })?;
|
||||
if let EntryMode::FILE = metadata.mode() {
|
||||
let value = store
|
||||
.read(key)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
e.to_string(),
|
||||
StatusCode::StorageUnavailable,
|
||||
))
|
||||
})
|
||||
.context(ListStateSnafu { path: key })?;
|
||||
yield (key.to_string(), value);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
async fn delete(&self, keys: &[String]) -> Result<()> {
|
||||
for key in keys {
|
||||
let object = self.store.object(key);
|
||||
object.delete().await.context(DeleteStateSnafu { key })?;
|
||||
self.store
|
||||
.delete(key)
|
||||
.await
|
||||
.context(DeleteStateSnafu { key })?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -116,8 +153,8 @@ impl StateStore for ObjectStateStore {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures_util::TryStreamExt;
|
||||
use object_store::services::Fs as Builder;
|
||||
use object_store::ObjectStoreBuilder;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -125,8 +162,10 @@ mod tests {
|
||||
async fn test_object_state_store() {
|
||||
let dir = create_temp_dir("state_store");
|
||||
let store_dir = dir.path().to_str().unwrap();
|
||||
let accessor = Builder::default().root(store_dir).build().unwrap();
|
||||
let object_store = ObjectStore::new(accessor).finish();
|
||||
let mut builder = Builder::default();
|
||||
builder.root(store_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder).unwrap().finish();
|
||||
let state_store = ObjectStateStore::new(object_store);
|
||||
|
||||
let data: Vec<_> = state_store
|
||||
|
||||
@@ -22,6 +22,7 @@ use datatypes::arrow;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use snafu::Location;
|
||||
use statrs::StatsError;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
@@ -31,7 +32,7 @@ pub enum Error {
|
||||
PyUdf {
|
||||
// TODO(discord9): find a way that prevent circle depend(query<-script<-query) and can use script's error type
|
||||
msg: String,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
@@ -46,20 +47,20 @@ pub enum Error {
|
||||
#[snafu(display("Fail to execute function, source: {}", source))]
|
||||
ExecuteFunction {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported input datatypes {:?} in function {}", datatypes, function))]
|
||||
UnsupportedInputDataType {
|
||||
function: String,
|
||||
datatypes: Vec<ConcreteDataType>,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to generate function, source: {}", source))]
|
||||
GenerateFunction {
|
||||
source: StatsError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to cast scalar value into vector: {}", source))]
|
||||
@@ -88,10 +89,7 @@ pub enum Error {
|
||||
DowncastVector { err_msg: String },
|
||||
|
||||
#[snafu(display("Bad accumulator implementation: {}", err_msg))]
|
||||
BadAccumulatorImpl {
|
||||
err_msg: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
BadAccumulatorImpl { err_msg: String, location: Location },
|
||||
|
||||
#[snafu(display("Invalid input type: {}", err_msg))]
|
||||
InvalidInputType {
|
||||
@@ -103,24 +101,24 @@ pub enum Error {
|
||||
#[snafu(display(
|
||||
"Illegal input_types status, check if DataFusion has changed its UDAF execution logic"
|
||||
))]
|
||||
InvalidInputState { backtrace: Backtrace },
|
||||
InvalidInputState { location: Location },
|
||||
|
||||
#[snafu(display("unexpected: not constant column"))]
|
||||
InvalidInputCol { backtrace: Backtrace },
|
||||
InvalidInputCol { location: Location },
|
||||
|
||||
#[snafu(display("Not expected to run ExecutionPlan more than once"))]
|
||||
ExecuteRepeatedly { backtrace: Backtrace },
|
||||
ExecuteRepeatedly { location: Location },
|
||||
|
||||
#[snafu(display("General DataFusion error, source: {}", source))]
|
||||
GeneralDataFusion {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute DataFusion ExecutionPlan, source: {}", source))]
|
||||
DataFusionExecutionPlan {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
@@ -148,7 +146,7 @@ pub enum Error {
|
||||
TypeCast {
|
||||
source: ArrowError,
|
||||
typ: arrow::datatypes::DataType,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
@@ -157,7 +155,7 @@ pub enum Error {
|
||||
))]
|
||||
ArrowCompute {
|
||||
source: ArrowError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Query engine fail to cast value: {}", source))]
|
||||
@@ -173,10 +171,7 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid function args: {}", err_msg))]
|
||||
InvalidFuncArgs {
|
||||
err_msg: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
InvalidFuncArgs { err_msg: String, location: Location },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -216,10 +211,6 @@ impl ErrorExt for Error {
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
@@ -236,83 +227,3 @@ impl From<BoxedError> for Error {
|
||||
Error::ExecutePhysicalPlan { source }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn throw_df_error() -> std::result::Result<(), DataFusionError> {
|
||||
Err(DataFusionError::NotImplemented("test".to_string()))
|
||||
}
|
||||
|
||||
fn assert_error(err: &Error, code: StatusCode) {
|
||||
let inner_err = err.as_any().downcast_ref::<Error>().unwrap();
|
||||
assert_eq!(code, inner_err.status_code());
|
||||
assert!(inner_err.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datafusion_as_source() {
|
||||
let err = throw_df_error()
|
||||
.context(ExecuteFunctionSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::EngineExecuteQuery);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
.context(GeneralDataFusionSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
|
||||
let err = throw_df_error()
|
||||
.context(DataFusionExecutionPlanSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_execute_repeatedly_error() {
|
||||
let error = None::<i32>.context(ExecuteRepeatedlySnafu).err().unwrap();
|
||||
assert_eq!(error.status_code(), StatusCode::Unexpected);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_recordbatch_stream_error() {
|
||||
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
||||
Err(common_recordbatch::error::Error::PollStream {
|
||||
source: DataFusionError::Internal("blabla".to_string()),
|
||||
backtrace: Backtrace::generate(),
|
||||
});
|
||||
let error = result
|
||||
.context(ConvertDfRecordBatchStreamSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_eq!(error.status_code(), StatusCode::Internal);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
fn raise_datatype_error() -> std::result::Result<(), DataTypeError> {
|
||||
Err(DataTypeError::Conversion {
|
||||
from: "test".to_string(),
|
||||
backtrace: Backtrace::generate(),
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_vector_error() {
|
||||
let err = raise_datatype_error()
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int32,
|
||||
})
|
||||
.err()
|
||||
.unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
let datatype_err = raise_datatype_error().err().unwrap();
|
||||
assert_eq!(datatype_err.status_code(), err.status_code());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ use std::any::Any;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::prelude::*;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use snafu::Location;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -26,7 +28,7 @@ pub enum Error {
|
||||
#[snafu(display("Fail to create datafusion record batch, source: {}", source))]
|
||||
NewDfRecordBatch {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Data types error, source: {}", source))]
|
||||
@@ -42,33 +44,50 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create RecordBatches, reason: {}", reason))]
|
||||
CreateRecordBatches {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
CreateRecordBatches { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Failed to convert Arrow schema, source: {}", source))]
|
||||
SchemaConversion {
|
||||
source: datatypes::error::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to poll stream, source: {}", source))]
|
||||
PollStream {
|
||||
source: datafusion::error::DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to format record batch, source: {}", source))]
|
||||
Format {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to init Recordbatch stream, source: {}", source))]
|
||||
InitRecordbatchStream {
|
||||
source: datafusion_common::DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Column {} not exists in table {}", column_name, table_name))]
|
||||
ColumnNotExists {
|
||||
column_name: String,
|
||||
table_name: String,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to cast vector of type '{:?}' to type '{:?}', source: {}",
|
||||
from_type,
|
||||
to_type,
|
||||
source
|
||||
))]
|
||||
CastVector {
|
||||
from_type: ConcreteDataType,
|
||||
to_type: ConcreteDataType,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -81,18 +100,17 @@ impl ErrorExt for Error {
|
||||
| Error::CreateRecordBatches { .. }
|
||||
| Error::PollStream { .. }
|
||||
| Error::Format { .. }
|
||||
| Error::InitRecordbatchStream { .. } => StatusCode::Internal,
|
||||
| Error::InitRecordbatchStream { .. }
|
||||
| Error::ColumnNotExists { .. } => StatusCode::Internal,
|
||||
|
||||
Error::External { source } => source.status_code(),
|
||||
|
||||
Error::SchemaConversion { source, .. } => source.status_code(),
|
||||
Error::SchemaConversion { source, .. } | Error::CastVector { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
@@ -12,14 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use serde::ser::{Error, SerializeStruct};
|
||||
use serde::{Serialize, Serializer};
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::error::{self, CastVectorSnafu, ColumnNotExistsSnafu, Result};
|
||||
use crate::DfRecordBatch;
|
||||
|
||||
/// A two-dimensional batch of column-oriented data with a defined schema.
|
||||
@@ -108,6 +110,41 @@ impl RecordBatch {
|
||||
pub fn rows(&self) -> RecordBatchRowIterator<'_> {
|
||||
RecordBatchRowIterator::new(self)
|
||||
}
|
||||
|
||||
pub fn column_vectors(
|
||||
&self,
|
||||
table_name: &str,
|
||||
table_schema: SchemaRef,
|
||||
) -> Result<HashMap<String, VectorRef>> {
|
||||
let mut vectors = HashMap::with_capacity(self.num_columns());
|
||||
|
||||
// column schemas in recordbatch must match its vectors, otherwise it's corrupted
|
||||
for (vector_schema, vector) in self.schema.column_schemas().iter().zip(self.columns.iter())
|
||||
{
|
||||
let column_name = &vector_schema.name;
|
||||
let column_schema =
|
||||
table_schema
|
||||
.column_schema_by_name(column_name)
|
||||
.context(ColumnNotExistsSnafu {
|
||||
table_name,
|
||||
column_name,
|
||||
})?;
|
||||
let vector = if vector_schema.data_type != column_schema.data_type {
|
||||
vector
|
||||
.cast(&column_schema.data_type)
|
||||
.with_context(|_| CastVectorSnafu {
|
||||
from_type: vector.data_type(),
|
||||
to_type: column_schema.data_type.clone(),
|
||||
})?
|
||||
} else {
|
||||
vector.clone()
|
||||
};
|
||||
|
||||
vectors.insert(column_name.clone(), vector);
|
||||
}
|
||||
|
||||
Ok(vectors)
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for RecordBatch {
|
||||
|
||||
@@ -5,6 +5,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait.workspace = true
|
||||
common-error = { path = "../error" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
metrics = "0.20"
|
||||
@@ -12,6 +13,7 @@ once_cell = "1.12"
|
||||
paste.workspace = true
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
tokio-test = "0.4"
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::prelude::*;
|
||||
use snafu::Location;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -24,16 +26,33 @@ pub enum Error {
|
||||
#[snafu(display("Failed to build runtime, source: {}", source))]
|
||||
BuildRuntime {
|
||||
source: std::io::Error,
|
||||
backtrace: Backtrace,
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Repeated task {} not started yet", name))]
|
||||
IllegalState { name: String, location: Location },
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to wait for repeated task {} to stop, source: {}",
|
||||
name,
|
||||
source
|
||||
))]
|
||||
WaitGcTaskStop {
|
||||
name: String,
|
||||
source: JoinError,
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn location_opt(&self) -> Option<common_error::snafu::Location> {
|
||||
match self {
|
||||
Error::BuildRuntime { location, .. }
|
||||
| Error::IllegalState { location, .. }
|
||||
| Error::WaitGcTaskStop { location, .. } => Some(*location),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
|
||||
pub mod error;
|
||||
mod global;
|
||||
pub mod metric;
|
||||
mod metrics;
|
||||
mod repeated_task;
|
||||
pub mod runtime;
|
||||
|
||||
pub use global::{
|
||||
@@ -23,4 +24,5 @@ pub use global::{
|
||||
spawn_read, spawn_write, write_runtime,
|
||||
};
|
||||
|
||||
pub use crate::repeated_task::{RepeatedTask, TaskFunction, TaskFunctionRef};
|
||||
pub use crate::runtime::{Builder, JoinError, JoinHandle, Runtime};
|
||||
|
||||
174
src/common/runtime/src/repeated_task.rs
Normal file
174
src/common/runtime/src/repeated_task.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_telemetry::logging;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::error::{IllegalStateSnafu, Result, WaitGcTaskStopSnafu};
|
||||
use crate::Runtime;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait TaskFunction<E: ErrorExt> {
|
||||
async fn call(&self) -> std::result::Result<(), E>;
|
||||
fn name(&self) -> &str;
|
||||
}
|
||||
|
||||
pub type TaskFunctionRef<E> = Arc<dyn TaskFunction<E> + Send + Sync>;
|
||||
|
||||
pub struct RepeatedTask<E> {
|
||||
cancel_token: Mutex<Option<CancellationToken>>,
|
||||
task_handle: Mutex<Option<JoinHandle<()>>>,
|
||||
started: AtomicBool,
|
||||
interval: Duration,
|
||||
task_fn: TaskFunctionRef<E>,
|
||||
}
|
||||
|
||||
impl<E: ErrorExt> std::fmt::Display for RepeatedTask<E> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "RepeatedTask({})", self.task_fn.name())
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: ErrorExt> std::fmt::Debug for RepeatedTask<E> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("RepeatedTask")
|
||||
.field(&self.task_fn.name())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: ErrorExt + 'static> RepeatedTask<E> {
|
||||
pub fn new(interval: Duration, task_fn: TaskFunctionRef<E>) -> Self {
|
||||
Self {
|
||||
cancel_token: Mutex::new(None),
|
||||
task_handle: Mutex::new(None),
|
||||
started: AtomicBool::new(false),
|
||||
interval,
|
||||
task_fn,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn started(&self) -> bool {
|
||||
self.started.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub async fn start(&self, runtime: Runtime) -> Result<()> {
|
||||
let token = CancellationToken::new();
|
||||
let interval = self.interval;
|
||||
let child = token.child_token();
|
||||
let task_fn = self.task_fn.clone();
|
||||
// TODO(hl): Maybe spawn to a blocking runtime.
|
||||
let handle = runtime.spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(interval) => {}
|
||||
_ = child.cancelled() => {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if let Err(e) = task_fn.call().await {
|
||||
logging::error!(e; "Failed to run repeated task: {}", task_fn.name());
|
||||
}
|
||||
}
|
||||
});
|
||||
*self.cancel_token.lock().await = Some(token);
|
||||
*self.task_handle.lock().await = Some(handle);
|
||||
self.started.store(true, Ordering::Relaxed);
|
||||
|
||||
logging::debug!(
|
||||
"Repeated task {} started with interval: {:?}",
|
||||
self.task_fn.name(),
|
||||
self.interval
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn stop(&self) -> Result<()> {
|
||||
let name = self.task_fn.name();
|
||||
ensure!(
|
||||
self.started
|
||||
.compare_exchange(true, false, Ordering::Relaxed, Ordering::Relaxed)
|
||||
.is_ok(),
|
||||
IllegalStateSnafu { name }
|
||||
);
|
||||
let token = self
|
||||
.cancel_token
|
||||
.lock()
|
||||
.await
|
||||
.take()
|
||||
.context(IllegalStateSnafu { name })?;
|
||||
let handle = self
|
||||
.task_handle
|
||||
.lock()
|
||||
.await
|
||||
.take()
|
||||
.context(IllegalStateSnafu { name })?;
|
||||
|
||||
token.cancel();
|
||||
handle.await.context(WaitGcTaskStopSnafu { name })?;
|
||||
|
||||
logging::debug!("Repeated task {} stopped", name);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::atomic::AtomicI32;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct TickTask {
|
||||
n: AtomicI32,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TaskFunction<crate::error::Error> for TickTask {
|
||||
fn name(&self) -> &str {
|
||||
"test"
|
||||
}
|
||||
|
||||
async fn call(&self) -> Result<()> {
|
||||
self.n.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repeated_task() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let task_fn = Arc::new(TickTask {
|
||||
n: AtomicI32::new(0),
|
||||
});
|
||||
|
||||
let task = RepeatedTask::new(Duration::from_millis(100), task_fn.clone());
|
||||
|
||||
task.start(crate::bg_runtime()).await.unwrap();
|
||||
tokio::time::sleep(Duration::from_millis(550)).await;
|
||||
task.stop().await.unwrap();
|
||||
|
||||
assert_eq!(task_fn.n.load(Ordering::Relaxed), 5);
|
||||
}
|
||||
}
|
||||
@@ -24,7 +24,7 @@ use tokio::sync::oneshot;
|
||||
pub use tokio::task::{JoinError, JoinHandle};
|
||||
|
||||
use crate::error::*;
|
||||
use crate::metric::*;
|
||||
use crate::metrics::*;
|
||||
|
||||
/// A runtime to run future tasks
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -581,7 +581,7 @@ pub fn expression_from_df_expr(
|
||||
| Expr::ScalarSubquery(..)
|
||||
| Expr::Placeholder { .. }
|
||||
| Expr::QualifiedWildcard { .. } => todo!(),
|
||||
Expr::GroupingSet(_) => UnsupportedExprSnafu {
|
||||
Expr::GroupingSet(_) | Expr::OuterReferenceColumn(_, _) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
@@ -752,6 +752,14 @@ mod utils {
|
||||
BuiltinScalarFunction::Uuid => "uuid",
|
||||
BuiltinScalarFunction::Struct => "struct",
|
||||
BuiltinScalarFunction::ArrowTypeof => "arrow_type_of",
|
||||
BuiltinScalarFunction::Acosh => "acosh",
|
||||
BuiltinScalarFunction::Asinh => "asinh",
|
||||
BuiltinScalarFunction::Atanh => "atanh",
|
||||
BuiltinScalarFunction::Cbrt => "cbrt",
|
||||
BuiltinScalarFunction::Cosh => "cosh",
|
||||
BuiltinScalarFunction::Pi => "pi",
|
||||
BuiltinScalarFunction::Sinh => "sinh",
|
||||
BuiltinScalarFunction::Tanh => "tanh",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,9 +22,10 @@ use catalog::CatalogManagerRef;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::common::{DFField, DFSchema, OwnedTableReference};
|
||||
use datafusion::common::{DFField, DFSchema};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use datafusion::sql::TableReference;
|
||||
use datafusion_expr::{Filter, LogicalPlan, TableScan};
|
||||
use prost::Message;
|
||||
use session::context::QueryContext;
|
||||
@@ -240,13 +241,13 @@ impl DFLogicalSubstraitConvertor {
|
||||
.projection
|
||||
.map(|mask_expr| self.convert_mask_expression(mask_expr));
|
||||
|
||||
let table_ref = OwnedTableReference::Full {
|
||||
catalog: catalog_name.clone(),
|
||||
schema: schema_name.clone(),
|
||||
table: table_name.clone(),
|
||||
};
|
||||
let table_ref = TableReference::full(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
table_name.clone(),
|
||||
);
|
||||
let adapter = table_provider
|
||||
.resolve_table(table_ref)
|
||||
.resolve_table(table_ref.clone())
|
||||
.await
|
||||
.with_context(|_| ResolveTableSnafu {
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
@@ -272,14 +273,13 @@ impl DFLogicalSubstraitConvertor {
|
||||
};
|
||||
|
||||
// Calculate the projected schema
|
||||
let qualified = &format_full_table_name(&catalog_name, &schema_name, &table_name);
|
||||
let projected_schema = Arc::new(
|
||||
project_schema(&stored_schema, projection.as_ref())
|
||||
.and_then(|x| {
|
||||
DFSchema::new_with_metadata(
|
||||
x.fields()
|
||||
.iter()
|
||||
.map(|f| DFField::from_qualified(qualified, f.clone()))
|
||||
.map(|f| DFField::from_qualified(table_ref.clone(), f.clone()))
|
||||
.collect(),
|
||||
x.metadata().clone(),
|
||||
)
|
||||
@@ -291,7 +291,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
|
||||
// TODO(ruihang): Support limit(fetch)
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name: qualified.to_string(),
|
||||
table_name: table_ref,
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
@@ -398,7 +398,6 @@ impl DFLogicalSubstraitConvertor {
|
||||
| LogicalPlan::CreateCatalog(_)
|
||||
| LogicalPlan::DropView(_)
|
||||
| LogicalPlan::Distinct(_)
|
||||
| LogicalPlan::SetVariable(_)
|
||||
| LogicalPlan::CreateExternalTable(_)
|
||||
| LogicalPlan::CreateMemoryTable(_)
|
||||
| LogicalPlan::DropTable(_)
|
||||
@@ -409,7 +408,8 @@ impl DFLogicalSubstraitConvertor {
|
||||
| LogicalPlan::Prepare(_)
|
||||
| LogicalPlan::Dml(_)
|
||||
| LogicalPlan::DescribeTable(_)
|
||||
| LogicalPlan::Unnest(_) => InvalidParametersSnafu {
|
||||
| LogicalPlan::Unnest(_)
|
||||
| LogicalPlan::Statement(_) => InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Trying to convert DDL/DML plan to substrait proto, plan: {plan:?}",
|
||||
),
|
||||
@@ -535,10 +535,11 @@ fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> b
|
||||
mod test {
|
||||
use catalog::local::{LocalCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use catalog::{CatalogList, CatalogProvider, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
|
||||
use datafusion::common::{DFSchema, ToDFSchema};
|
||||
use datafusion_expr::TableSource;
|
||||
use datatypes::schema::RawSchema;
|
||||
use table::engine::manager::MemoryTableEngineManager;
|
||||
use table::requests::CreateTableRequest;
|
||||
use table::test_util::{EmptyTable, MockTableEngine};
|
||||
|
||||
@@ -549,11 +550,11 @@ mod test {
|
||||
|
||||
async fn build_mock_catalog_manager() -> CatalogManagerRef {
|
||||
let mock_table_engine = Arc::new(MockTableEngine::new());
|
||||
let catalog_manager = Arc::new(
|
||||
LocalCatalogManager::try_new(mock_table_engine)
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
let engine_manager = Arc::new(MemoryTableEngineManager::alias(
|
||||
MITO_ENGINE.to_string(),
|
||||
mock_table_engine.clone(),
|
||||
));
|
||||
let catalog_manager = Arc::new(LocalCatalogManager::try_new(engine_manager).await.unwrap());
|
||||
let schema_provider = Arc::new(MemorySchemaProvider::new());
|
||||
let catalog_provider = Arc::new(MemoryCatalogProvider::new());
|
||||
catalog_provider
|
||||
@@ -579,6 +580,7 @@ mod test {
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
engine: MITO_ENGINE.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -620,10 +622,13 @@ mod test {
|
||||
let projected_schema =
|
||||
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
|
||||
|
||||
let table_name = TableReference::full(
|
||||
DEFAULT_CATALOG_NAME,
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
DEFAULT_TABLE_NAME,
|
||||
);
|
||||
let table_scan_plan = LogicalPlan::TableScan(TableScan {
|
||||
table_name: format!(
|
||||
"{DEFAULT_CATALOG_NAME}.{DEFAULT_SCHEMA_NAME}.{DEFAULT_TABLE_NAME}",
|
||||
),
|
||||
table_name,
|
||||
source: adapter,
|
||||
projection: Some(projection),
|
||||
projected_schema,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user